clear all
set more off, perm
set mem 10000000
set matsize 10000
version 13

*********************************************************** 
*** Table and figures for JPE revisions: summary stats  ***
*********************************************************** 

** Set file paths
do "$path_code/paths.do"

** Set graph scheme
cd "$path/code/analyze"
set scheme fb, perm

********************************************************************************
********************************************************************************

** Calculaton of RGGVY funds released and households electrified through 2011, for Section 2 of main text
{
	// numbers from p29 of Sreekumar and Dixit (2011)
global funds = 25335 * 10 * 1e6
global hhs = 1.75 * 10 * 1e6
global elec_rate = 0.44

	// Inflate all rupees from 2011 to 2019 levels
import excel "$nss/INDCPIALLAINMEI.xls", sheet("FRED Graph") cellrange(A11:B33) firstrow clear
gen year = year(observation_date)
sum IND if year==2019
local cpi2019 = r(mean)
gen INDdef = IND/`cpi2019'
sum INDdef if year==2011
local inflator = r(mean)
global funds2019 = ${funds}/`inflator'

	// currency conversion to 2011 USD using average exchange rate (https://www.x-rates.com/average/?from=INR&to=USD&amount=1&year=2011)
global xrate2011 = (0.022046*31 + 0.022041*28 + 0.022254*31 + 0.022579*30 + 0.022292*31 + 0.022291*30 + 0.022511*31 + 0.022068*31 + ///
	0.021082*30 + 0.020336*31 + 0.019764*30 + 0.019082*31)/365
di 1/${xrate2011}	
di ${funds}*${xrate2011}/1e9
	
	// currency conversion to 2011 USD using average exchange rate (https://www.x-rates.com/average/?from=INR&to=USD&amount=1&year=2011)
global xrate2019 = (0.014158*31 + 0.014039*28 + 0.014356*31 + 0.014403*30 + 0.014328*31 + 0.014391*30 + 0.014542*31 + 0.014064*31 + ///
	0.014009*30 + 0.014079*31 + 0.013992*30 + 0.014043*31)/365
di 1/${xrate2019}	
di ${funds2019}*${xrate2019}/1e9

	// We'll put the 2011 USD number in the main text, since inflating to 2019 USD only makes it slightly larger
	
	// share of households (total)
use "$pca/pca_census11.dta", clear
sum no_hh11
global hh_total = r(sum)
di ${hhs}/${hh_total} // 10.4% of all rural households
	
	// share of previously unelectrified households, per NSS
use "$panel/panel_dataset_dd_nss_uncollapsed.dta", clear
keep if year==2005
gen weight_composite = weight_normalized * tot_p_dist
sum elec_q_yn [aw=weight_composite]
global hh_unelec = ${hh_total} * (1-r(mean))
di ${hhs}/${hh_unelec} // 21.5% of all previously unelectrified rural households	

	// share of previously unelectrified households, per S&D
global hh_unelec_sd = ${hh_total} * (1-${elec_rate})
di ${hhs}/${hh_unelec_sd} // 18.5% of all previously unelectrified rural households	
	
}

********************************************************************************
********************************************************************************

** Figure 1: scatter plot of World Bank GDP vs. electricity access gap 
{

// read in electrification rate data
insheet using "$other/elec_rate.csv", comma clear

// get rid of extranous rows & variables
drop if _n <3
drop v5-v44
drop v64 v65 v66

// keep 2000 - 2018 (more recent isn't that well populated)
rename (v45-v63) (elec_2000 elec_2001 elec_2002 elec_2003 elec_2004 elec_2005 elec_2006 ///
   elec_2007 elec_2008 elec_2009 elec_2010 elec_2011 elec_2012 elec_2013 elec_2014 ///
   elec_2015 elec_2016 elec_2017 elec_2018)
   
// drop another row
drop if v1 == "Country Name"

// prepare for reshape wide --> long
rename (v1 v2) (country country_code)
keep country* elec*

// reshape
reshape long elec_, i(country country_code) j(year)
rename elec_ electrification_rate

// save for merge
tempfile electmp
save "`electmp'"


// repeat with the population dataset
insheet using "$other/population.csv", comma clear
drop if _n <3
drop v5-v44
drop v64 v65 v66
rename (v45-v63) (pop_2000 pop_2001 pop_2002 pop_2003 pop_2004 pop_2005 pop_2006 ///
   pop_2007 pop_2008 pop_2009 pop_2010 pop_2011 pop_2012 pop_2013 pop_2014 ///
   pop_2015 pop_2016 pop_2017 pop_2018)
   
drop if v1 == "Country Name"
rename (v1 v2) (country country_code)
keep country* pop*

reshape long pop_, i(country country_code) j(year)
rename pop_ population
// save for merge
tempfile poptmp
save "`poptmp'"


// repeat with the GDP per capita dataset
insheet using "$other/gdp_pc_2010.csv", comma clear
drop if _n <3
drop v5-v44
drop v64 v65 v66
rename (v45-v63) (gdppc_2000 gdppc_2001 gdppc_2002 gdppc_2003 gdppc_2004 gdppc_2005 gdppc_2006 ///
   gdppc_2007 gdppc_2008 gdppc_2009 gdppc_2010 gdppc_2011 gdppc_2012 gdppc_2013 gdppc_2014 ///
   gdppc_2015 gdppc_2016 gdppc_2017 gdppc_2018)
   
drop if v1 == "Country Name"
rename (v1 v2) (country country_code)
keep country* gdppc*

reshape long gdppc_, i(country country_code) j(year)
rename gdppc_ gdp_per_capita

// merge three datasets together
merge 1:1 country country_code year using "`electmp'", nogen 
merge 1:1 country country_code year using "`poptmp'", nogen 

// flag non-countries for dropping
gen not_a_country = 0
replace not_a_country = 1 if country_code == "ARB" | country_code == "CSS"  ///
     | country_code == "CEB" | country_code == "CHI" | country_code == "EAR" ///
	 | country_code == "EAS" | country_code == "TEA" | country_code == "EAP" ///
	 | country_code == "EMU" | country_code == "ECS" | country_code == "TEC" ///
	 | country_code == "ECA" | country_code == "EUU" | country_code == "FCS" ///
	 | country_code == "HPC" | country_code == "HIC" | country_code == "IBRD" ///
	 | country_code == "IBT" | country_code == "IDB" | country_code == "IDX" ///
	 | country_code == "IDA" | country_code == "IMN" | country_code == "LTE" ///
	 | country_code == "LCN" | country_code == "LAC" | country_code == "TLA" ///
	 | country_code == "LDC" | country_code == "LMY" | country_code == "LIC" ///
	 | country_code == "LMC" | country_code == "MEA" | country_code == "TMN" ///
	 | country_code == "MNA" | country_code == "MIC" | country_code == "NAC" ///
	 | country_code == "INX" | country_code == "OED" | country_code == "OSS" ///
	 | country_code == "PSS" | country_code == "PST" | country_code == "PRE" ///
	 | country_code == "SST" | country_code == "SAS" | country_code == "TSA" ///
	 | country_code == "SSF" | country_code == "TSS" | country_code == "SSA" ///
	 | country_code == "UMC" | country_code == "WLD" 

drop if not_a_country == 1



// set up locals for graphs 
sum gdp if year == 2011 & country == "India"
global indgdp11 = `r(mean)'

sum elec if year == 2011 & country == "India"
global indelec11 = `r(mean)'

sum gdp if year == 2005 & country == "India"
global indgdp05 = `r(mean)'

sum elec if year == 2005 & country == "India"
global indelec05 = `r(mean)'


twoway ///
     (scatter gdp_per_capita electr if year == 2020, mcolor(gs10)  msize(large) msymbol(+)) ///
     (scatter gdp_per_capita electr if year == 2020 & electr < 80, msize(large) mcolor(black)  /*msize(mediumsmall) */ msymbol(circle_hollow)) ///
     (scatter gdp_per_capita electr if year == 2018 & electr < 80 [w=population], mcolor(black)  /*msize(mediumsmall) */ msymbol(circle_hollow)) ///
    /* (scatter gdp_per_capita electr if year == 2011 & country == "India" , mcolor(navy) mfcolor(white) msymbol(triangle) msize(large))  */ ///       
    , ///
	yline($indgdp11) xline($indelec11) ///
	xtitle("Share of population with access to electricity", size(medsmall)) ytitle("GDP per capita" "(2010 USD)", size(medsmall)) ///
	ylabel(,labsize(medsmall)) xlabel(,labsize(medsmall)) ///
	legend(order(1 "India, 2011" 2 "World, 2018") rows(2) position(11) ring(0) size(medsmall)) aspect(0.4)	
	
cap erase "$texfig/figure_gdp_unelec-eps-converted-to.pdf"
graph export "$texfig/figure_gdp_unelec.eps", replace 
		


*preserve
keep if electrification_rate < 80 & year == 2018
drop if elec == . | gdp == . | pop == .

egen totpop = total(population)
gen totpop_bn = totpop / 1000000000

sum totpop_bn
global totpop_bn = string(`r(mean)', "%9.2f")
di $totpop_bn


egen totpop_low = total(population) if gdp <= $indgdp11		
				
egen totpop_low20 = total(population) if gdp <= 1.2*$indgdp11		

gen popshare_low = (totpop_low / totpop) * 100
gen popshare_low20 = (totpop_low20 / totpop) * 100

sum population
global countries = string(`r(N)')

sum popshare_low
global low_countries = string(`r(N)')
global low_country_pct = string(`r(mean)', "%2.0f")
local low_pct = `r(mean)'
local low_n = `r(N)'

sum popshare_low20
global low_countries20 = string(`r(N)')
global low_country_pct20 = string(`r(mean)', "%2.0f")
local low20_pct = `r(mean)'
local low20_n = `r(N)'

local low_pct_diff = (`low20_pct' - `low_pct')
global low_pct_diff = string(`low_pct_diff', "%9.0f")

local low_n_diff = `low20_n' - `low_n'
global low_n_diff = string(`low_n_diff', "%2.0f")


   // MAKE FIGURE
file open tablewrite using "$texfig/figure_gdp_unelec.tex", write text replace
 
file write tablewrite "\begin{figure}[p]\centering" _n
file write tablewrite "\caption{Electricity access and per-capita GDP -- India vs.\ the world}" _n
file write tablewrite "\label{fig:gdp_unelec}" _n
file write tablewrite "\vspace{-2mm}" _n
file write tablewrite "{\includegraphics[width=.74\textwidth, trim={1mm 21mm 1mm 17mm}, clip]{${texfig_short}/figure_gdp_unelec.eps}} " _n
file write tablewrite "\vspace{-2mm}" _n
file write tablewrite "\caption*{\scriptsize Note. --- This figure plots country-level per-capita GDP (in 2010 USD)" _n
file write tablewrite "vs.\ the share of the population with electricity access.  " _n
file write tablewrite "Grey lines indicate India's levels in 2011, at the end of our study period. " _n
file write tablewrite "Black circles show all $countries countries with below 80\% electricity access in 2018, " _n
file write tablewrite "with a total population of $totpop_bn billion people. Circle sizes are scaled by each country's population. " _n
file write tablewrite "$low_countries counties (containing $low_country_pct\% of the people) had 2018 per-capita GDPs " _n
file write tablewrite "lower than India's 2011 per-capita GDP. An additional $low_n_diff countries (containing $low_pct_diff\% " _n
file write tablewrite "of the people) had per-capita GDPs within 20\% of 2011 India. " _n
file write tablewrite "Data are from \textcite{worldbank_open_2021}." _n
file write tablewrite "}" _n
file write tablewrite "\end{figure}" _n
file close tablewrite		

}

********************************************************************************
********************************************************************************

** Figure 2: Map of RGGVY districts
{
cap file close tablewrite
file open tablewrite using "$texfig/figure_rggvy_districts.tex", write text replace
 
file write tablewrite "\begin{figure}[p]\centering" _n
file write tablewrite "\caption{Indian districts by RGGVY implementation phase}" _n
file write tablewrite "\label{fig:rggvy_districts}" _n
file write tablewrite "\vspace{-2mm}" _n
file write tablewrite "{\includegraphics[width=\textwidth, trim={10mm 5mm 10mm 0mm}, clip]{${texfig_short}/rggvy_district_map_all_hash_grayscale_wide.pdf}} " _n
file write tablewrite "\vspace{-4mm}" _n
file write tablewrite "\captionsetup{width = \textwidth}" _n
file write tablewrite "\caption*{\scriptsize Note. --- " _n
file write tablewrite "We shades 2001 districts by RGGVY coverage status.  " _n
file write tablewrite "Dark districts were covered under the 10th Plan (RGGVY's first wave), " _n
file write tablewrite "light districts were covered under the 11th Plan (RGGVY's second wave),  " _n
file write tablewrite "cross-hatched districts were covered under both 10th and 11th Plans, " _n
file write tablewrite "and white districts were not covered by RGGVY. " _n
file write tablewrite "In 2001, India had 584 districts across its 28 states and 7 Union Territories.  " _n
file write tablewrite "RGGVY covered 530 total districts in 27 states (neither Goa nor the Union Territories were eligible). " _n
file write tablewrite "30 districts were split between the 10th and 11th Plans; " _n
file write tablewrite "23 states contain both 10th- and 11th-Plan districts. " _n
file write tablewrite "}" _n
file write tablewrite "\end{figure}" _n

file close tablewrite

}

********************************************************************************
********************************************************************************

** Figure 3: Histograms of running variable 
{
use "$panel/panel_dataset_no_lights.dta", clear

  // Histogram of 2001 village population (bandwidth) 
twoway	(hist tot_p if pca01_id!=. & tot_p<=450 & tot_p>=150, freq fc(white) lc(black) lw(thin) w(5)) ///
		(hist tot_p if pca01_id!=. & tot_p<=450 & tot_p>=150 & vplan4<11, freq fc(gs11) lc(black) lw(thin) w(5)) ///
		(hist tot_p if pca01_id!=. & tot_p<=450 & tot_p>=150 & vplan4<11 & sing_h==1 & pop_mismatch20==0, freq fc(gs4) lc(black) lw(thin) w(5)) ///
		(scatteri 0 300 2300 300 , c(l) m(i) lc(black) lw(thick) lp(solid)), ///
		ytitle("Villages", size(large))  xtitle("2001 village population", size(large)) /// 
		/*title("Indian Villages by 2001 Population", color(black) size(medlarge))*/ ///
		graphregion(lcolor(white)) graphregion(color(white)) plotregion(fcolor(white)) ///
		xlabel(150 200 250 300 350 400 450, labsize(large)) ylabel(0 1000 2000, nogrid angle(0) labsize(large)) ///
		legend(c(1) order(1 2 3) size(large) pos(6) ///
			lab(1 "All villages") ///
			lab(2 "Villages in 10th-Plan districts") ///
			lab(3 "Single-habitation villages in 10th-Plan districts"))
cap erase "$texfig/figure_pop_hist_150_450-eps-converted-to.pdf"
graph export "$texfig/figure_pop_hist_150_450.eps", replace 

  // Histograms of 2001/2011 village population (full support) 
gen tot_p_top = tot_p
replace tot_p_top = 4000 if tot_p>4000
gen tot_p11_top = tot_p11
replace tot_p11_top = 4000 if tot_p11>4000

twoway	(histogram tot_p_top, freq fcolor(gs4) lcolor(black) start(0.5) width(100)) ///
		(histogram tot_p11_top, freq fcolor(none) lcolor(gs11) start(0.5) width(100)  lw(medthick)) ///
		(scatteri 48000 150 (12) "" 48000 380 (12) "Bandwidths", color(black) ///
			msymbol(p) connect(l) lwidth(medium) mlabcolor(black) lp(l) mlabsize(medium)) ///
		(scatteri 48000 150 46200 150, color(black) msymbol(p) connect(l) lwidth(medium) lp(l)) ///
		(scatteri 48000 450 46200 450, color(black) msymbol(p) connect(l) lwidth(medium) lp(l))  ///  
		(scatteri 48000 320 48000 450, color(black) msymbol(p) connect(l) lwidth(medium) lp(l)) ///
		, ///       
		xtitle("Village population", size(large))  ytitle("Thousand villages", size(large)) ///
		graphregion(lcolor(white)) graphregion(color(white)) plotregion(fcolor(white))  xlabel(, labsize(large)) ///
		 /*title("Indian Villages by Population", size(medium) color(black)) */ ///
		legend(order(1 "2001 Census" 2 "2011 Census") c(1) size(large) bmargin(medlarge) pos(6)) ///
		ylabel(,nogrid angle(0) labsize(large)) yla(0  10000 "10" 20000 "20" 30000 "30" 40000 "40" 50000 "50") 
cap erase "$texfig/figure_pop_hist_wide-eps-converted-to.pdf" 
graph export "$texfig/figure_pop_hist_wide.eps", replace 


cap file close tablewrite
file open tablewrite using "$texfig/figure_pop_hist.tex", write text replace
 
file write tablewrite "\begin{figure}[p]\centering" _n
file write tablewrite "\caption{Density of RD running variable}" _n
file write tablewrite "\label{fig:pop_hist}" _n
file write tablewrite "\vspace{-2mm}" _n
file write tablewrite "{\includegraphics[width=0.49\textwidth, trim={2mm 4mm 0mm 0mm}, clip]{${texfig_short}/figure_pop_hist_wide.eps}} " _n
file write tablewrite "{\includegraphics[width=0.49\textwidth, trim={0mm 4mm 2mm 0mm}, clip]{${texfig_short}/figure_pop_hist_150_450.eps}} " _n
file write tablewrite "\vspace{-2mm}" _n
file write tablewrite "\caption*{\scriptsize Note. --- " _n
file write tablewrite "The left histogram shows village populations for 2001 (dark) and 2011 (light), " _n
file write tablewrite "top-coding each distribution at 4000. " _n
file write tablewrite "The right histogram zooms in on villages close to RGGVY's 300-person population cutoff," _n
file write tablewrite "with 2001 populations between 150 and 450 (slightly wider than our optimal RD bandwidths). " _n
file write tablewrite "Dark bars show the sample of single-habitation 10th-Plan villages used in our RD analysis, " _n
file write tablewrite "relative to all Indian villages (white) and all villages in 10th-Plan districts (light gray).  " _n
file write tablewrite "}" _n
file write tablewrite "\end{figure}" _n

file close tablewrite

}

****************************************************************** 
****************************************************************** 

** Table 1: Summary stats table for main text
{

	// Panel A: Pre-RGGVY village-level covariates
use "$panel/panel_dataset_no_lights.dta", clear
keep pca01_id pop_non_zero tot_p vplan4 sing_h sample sample_h pop_mismatch20 corr_state ///
	vd_geo_a_01 work_p_01 work_pooled_ag_p_01 work_pooled_ot_p_01 ///
	vd_pwr_d_any_01 vd_pwr_d_dom_01 vd_pwr_d_agr_01 vd_pwr_d_all_01 ///
	pct_irr_01 lit_p_01 vd_geo_k_town_01 vd_wat_d_any_01
	
	// Merge in DISE data for 2005
merge 1:1 pca01_id using "$panel/pca_school_selected_WIDE_new.dta", keep(1 3) keepusing(tot_enroll*2005) gen(merge_dise)

	// Merge in EC data for 2005 
preserve
use "$panel/ec_shrid_pc01_panel.dta", clear	
tab count_dup_vill_ec, missing
drop if count_dup_vill_ec==1
tab count_dup_shrid, missing
drop if count_dup_shrid!=0
tab flag_ec
drop if flag_ec==1
keep pca01_id ec_*
reshape wide ec_employees ec_firms, i(pca01_id) j(ec_year)
drop *1990 *1998 *2013
drop if ec_firms2005==. & ec_employees2005==.
assert ec_employees2005!=. if ec_firms2005!=.
assert ec_firms2005!=0 
tempfile ec
save `ec'
restore
merge 1:1 pca01_id using `ec', keep(1 3) gen(merge_ec)
	

local r = 0
foreach v of varlist work_pooled_ag_p_01 work_pooled_ot_p_01 ec_firms2005 lit_p_01 tot_enroll_all2005 vd_pwr_d_any_01 vd_geo_k_town_01 {
	local r = `r'+1
	local name`r' = subinstr("`v'","_","\_",.)
	qui sum `v' if pop_non_zero==1  & tot_p>=150 & tot_p<=450 
	local mean1`r' = string(r(mean),"%9.2f")
	local sd1`r' = string(r(sd),"%9.2f")
	qui sum `v' if pop_non_zero==1  & tot_p>=150 & tot_p<=450 & vplan4<11
	local mean2`r' = string(r(mean),"%9.2f")
	local sd2`r' = string(r(sd),"%9.2f")
	qui sum `v' if pop_non_zero==1  & tot_p>=150 & tot_p<=450 & vplan4<11 & corr_state==1 & sample==1 & sample_h==1 & sing_h==1 & pop_mismatch20==0
	local mean3`r' = string(r(mean),"%9.2f")
	local sd3`r' = string(r(sd),"%9.2f")
}	
qui count if pop_non_zero==1  & tot_p>=150 & tot_p<=450 
local N1 = string(r(N),"%9.0fc")
qui count if pop_non_zero==1  & tot_p>=150 & tot_p<=450 & vplan4<11
local N2 = string(r(N),"%9.0fc")
qui count if pop_non_zero==1  & tot_p>=150 & tot_p<=450 & vplan4<11 & corr_state==1 & sample==1 & sample_h==1 & sing_h==1 & pop_mismatch20==0
local N3 = string(r(N),"%9.0fc")


local name1 = "Agricultural workers $\big/$ population (2001)"
local name2 = "Non-agricultural workers $\big/$ population (2001)"
local name3 = "Number of firms in village (2005)"
local name4 = "Literacy rate (2001)"
local name5 = "School enrollment (2005--06 headcount)"
local name6 = "Electric access anywhere in village (2001)"
local name7 = "Distance to nearest town (km)"



	// Panel B: 2005 district covariates (NSS)
use "$panel/panel_dataset_dd_nss.dta", clear
keep if year==2005
unique st_code dt_code
assert r(unique)==r(N)

preserve
use "$results/nss_reg_results.dta", clear
keep if panel=="district-year collapsed"
keep if regs=="ols (pretrends)"
keep if inlist(yvar,"mth_pc_exp","elec_quantity","elec_q_yn","elec_light","fan","tv","fridge","ac")
keep if ytag==""
keep if ifs==" if year<2010"
keep if inlist(fes, "year c.year#exp05_st_4ile c.year#exp05_ntl_10ile c.year#st_code stdt")
unique yvar
assert r(unique)==r(N)
tempfile pretrends
save `pretrends'
restore
append using `pretrends'

local s = 0
foreach v of varlist mth_pc_exp elec_q_yn elec_light fan tv fridge ac {
	local s = `s'+1
	qui sum `v' if treat_x_post05==1
	local mean4`s' = string(r(mean),"%9.2f")
	local sd4`s' = string(r(sd),"%9.2f")
	qui sum `v' if treat_x_post05==0
	local mean5`s' = string(r(mean),"%9.2f")
	local sd5`s' = string(r(sd),"%9.2f")
	qui sum beta if yvar=="`v'"
	local mean6`s' = string(r(mean), "%9.3f")
	qui sum se if yvar=="`v'"
	local sd6`s' = string(r(mean), "%9.3f")
	qui sum pvalue if yvar=="`v'"	
	if r(mean) <= 0.01 {
		local stars`s' = "$^{***}$"
	}
	else if r(mean) <= 0.05 {
		local stars`s' = "$^{**}$"
	}
	else if r(mean) <= 0.10 {
		local stars`s' = "$^{*}$"
	}
	else {
		local stars`s' = ""
	}	
}	
qui count if treat_x_post05==1
local N4 = string(r(N),"%9.0fc")
qui count if treat_x_post05==0
local N5 = string(r(N),"%9.0fc")

local name_nss1 = "Expenditure per capita (Rs/month)"
local name_nss2 = "Share households consuming any electricity"
local name_nss3 = "Share households with electric lighting"
local name_nss4 = "Share households with electric fan"
local name_nss5 = "Share households with TV"
local name_nss6 = "Share households with refrigerator"
local name_nss7 = "Share households with air conditioning"

cap file close tablewrite
file open tablewrite using "$textab/table_sumstats.tex", write text replace

file write tablewrite "\begin{table}[h!]\centering" _n
file write tablewrite " \caption{Summary statistics prior to RGGVY \label{tab:sumstats}}" _n
file write tablewrite " \vspace{-0.2cm}" _n
file write tablewrite " \small" _n
file write tablewrite "\begin{tabular}{lcccccccc} " _n
file write tablewrite " \hline" _n
file write tablewrite "\multicolumn{1}{l}{\textbf{A.} {Village-level covariates, 150--450 population~~~~}} " _n
file write tablewrite " & $\begin{matrix}\text{All} \\ \text{Districts} \end{matrix}$ " _n
file write tablewrite " & $\begin{matrix}\text{10th-Plan} \\ \text{Districts} \end{matrix}$ " _n
file write tablewrite " & $\begin{matrix}\text{RD} \\ \text{Sample} \end{matrix}$ \\" _n
file write tablewrite "\vspace{-0.49cm}" _n
file write tablewrite "\\" _n
file write tablewrite "\hline" _n
file write tablewrite "\vspace{-0.33cm}" _n
file write tablewrite "\\" _n
file write tablewrite "[0.15em] " _n
forvalues i = 1/`r' {
file write tablewrite "~~`name`i'' & $`mean1`i''$ & $`mean2`i''$ & $`mean3`i''$ \\" _n
file write tablewrite "[-0.15em]" _n
file write tablewrite " ~~~~~~ & $(`sd1`i'')$ & $(`sd2`i'')$ & $(`sd3`i'')$ \\" _n
file write tablewrite "[0.345em]" _n
}
file write tablewrite "~~Number of villages & `N1' & `N2' & `N3'  \\" _n
file write tablewrite "[1.55em]" _n
file write tablewrite "\multicolumn{1}{l}{\textbf{B.} {District-level covariates, 2005 NSS~~~~}} " _n
file write tablewrite " & $\begin{matrix}\text{10th-Plan} \\ \text{Districts} \end{matrix}$ " _n
file write tablewrite " & $\begin{matrix}\text{11th-Plan} \\ \text{Districts} \end{matrix}$ " _n
file write tablewrite " & $\begin{matrix}\text{Pre-trend} \\ \text{Estimates} \end{matrix}$ \\" _n
file write tablewrite "\vspace{-0.49cm}" _n
file write tablewrite "\\" _n
file write tablewrite "\hline" _n
file write tablewrite "\vspace{-0.33cm}" _n
file write tablewrite "\\" _n
file write tablewrite "[0.15em] " _n
forvalues i = 1/`s' {
file write tablewrite "~~`name_nss`i'' & $`mean4`i''$ & $`mean5`i''$ & $`mean6`i''$`stars`i'' \\" _n
file write tablewrite "[-0.15em]" _n
file write tablewrite " ~~~~~~ & $(`sd4`i'')$ & $(`sd5`i'')$ & $[`sd6`i'']$ \\" _n
file write tablewrite "[0.345em]" _n
}
file write tablewrite "~~Number of districts & `N4' & `N5'  \\" _n
file write tablewrite "[0.05em]" _n
file write tablewrite "\hline" _n
file write tablewrite "\end{tabular}" _n
file write tablewrite "\vspace{-3.5mm}" _n
file write tablewrite "\captionsetup{width=\textwidth}" _n
file write tablewrite "\caption*{\scriptsize Note. ---  " _n
file write tablewrite "Panel A reports means and standard deviations of village-level covariates " _n
file write tablewrite "from the 2001 Census, the 2005 Economic Census, and 2005--06 DISE school data. " _n
file write tablewrite "All three columns include only villages with 2001 populations between 150 and 450, " _n
file write tablewrite "which is slightly wider than our optimal RD bandwidths. " _n
file write tablewrite "The middle column includes districts in the first wave of RGGVY implementation. " _n
file write tablewrite "The right column further restricts the sample to single-habitation " _n
file write tablewrite "villages in 10th-Plan districts, in states with reliable village shapefiles. " _n
file write tablewrite "Panel B reports district-level means and standard deviations for " _n
file write tablewrite "10th- vs.\ 11th-Plan districts using the 2005 NSS (representative at " _n
file write tablewrite "the household level). " _n
file write tablewrite "The right column reports district-level pre-trend estimates using 2000 and 2005 " _n
file write tablewrite "NSS data, comparing 10th vs.\ 11th Plans, 2005 vs.\ 2000 " _n
file write tablewrite "(including state-specific linear trends; standard errors in brackets). " _n
file write tablewrite "Appendix Tables \ref*{tab:nss_first_stage_pretrends}--\ref*{tab:nss_reduced_form_pretrends} " _n
file write tablewrite "report these regression results in full. " _n
file write tablewrite "Significance: *** \$p < 0.01\$, ** \$p < 0.05\$, * \$p < 0.10\$." _n
file write tablewrite "}" _n
file write tablewrite "\end{table}" _n

file close tablewrite
	
}
	
********************************************************************************
********************************************************************************

** Figure A6: Histogram of RGGVY implementation intensity (share of villages) 
{

use "$panel/panel_dataset_full.dta", clear

	// Bring in RGGVY district-level administrative data (10th Plan only)
preserve
use "$rggvy/rggvy_district_progress_X_XI_processed.dta", clear
egen dt_group = group(st_code dt_code)
egen temp_group = group(plan implement_type)
unique st_code, by(temp_group) gen(uniq_st)
unique dt_group, by(temp_group) gen(uniq_dt)
unique dpr_code, by(temp_group) gen(uniq_dpr)
unique st_code if plan==10
unique st_code if plan==11
unique dt_group if plan==10
unique dt_group if plan==11
count if award_date<sanction_date
replace award_date = sanction_date if award_date<sanction_date
replace award_date = max(award_date,17553) if plan==11
keep if plan==10
collapse (min) min_award_date=award_date (max) max_award_date=award_date ///
	(sum) award_cost total_released achiev_UDE achiev_ELEC achiev_BPL ///
	, by(st_code dt_code)	
gen med_award_date = round((min_award_date+max_award_date)/2,1)
format %td med_award_date	
tempfile admin
save `admin'
restore	
merge m:1 st_code dt_code using `admin' 

	// RGGVY admin splitter 1: (# villages treated) / (# villages in district)
egen temp1 = count(vi_code) if tot_p>=300, by(st_code dt_code)
egen temp2 = mode(temp1), by(st_code dt_code)
gen RGGVY_share_villages_300 = (achiev_UDE + achiev_ELEC) / temp2
egen temp3 = count(vi_code), by(st_code dt_code)
gen RGGVY_share_villages_all = (achiev_UDE + achiev_ELEC) / temp3
egen temp_tag = tag(st_code dt_code) 
twoway scatter RGGVY_share_villages_300 RGGVY_share_villages_all if temp_tag
tab state if temp_tag & RGGVY_share_villages_300>1.4 & RGGVY_share_villages_all!=. & corr_state==1
gen RGGVY_rule_breaker = RGGVY_share_villages_300>1.4
twoway scatter RGGVY_share_villages_300 RGGVY_share_villages_all if temp_tag & RGGVY_rule_breaker==0
sum RGGVY_share_villages_all if temp_tag & RGGVY_rule_breaker==0, detail
sum RGGVY_share_villages_300 if temp_tag & RGGVY_rule_breaker==0, detail
// split on 60% of villages in district
drop temp*

	// Keep (roughly) RD sample
gen in_fs_sample = vplan4<11 & corr_state==1 & sample==1 & sample_h==1 & sing_h==1 & pop_non_zero==1 
keep if in_fs_sample==1
keep if inrange(tot_p,150,450)

  // Histogram of 2001 village population (bandwidth) 
twoway	(hist RGGVY_share_villages_all, freq fc(navy) lc(black) lw(thin) w(0.05)) ///
		, ///
		ytitle("Villages in RD sample", size(medsmall))  xtitle("Share of district's villages treated by RGGVY", size(medsmall)) /// 
		/*title("Indian Villages by 2001 Population", color(black) size(medlarge))*/ ///
		graphregion(lcolor(white)) graphregion(color(white)) plotregion(fcolor(white)) ///
		xlabel(0(0.2)1.2, labsize(medsmall)) ylabel(, nogrid angle(0) labsize(medsmall)) ///
		legend(off) aspect(0.35)
graph export "$texfig/figure_hist_rggvy_intensity.pdf", replace 


cap file close tablewrite
file open tablewrite using "$texfig/figure_hist_rggvy_intensity.tex", write text replace
 
file write tablewrite "\begin{figure}[h!]\centering" _n
file write tablewrite "\caption{RGGVY district-wide treatment intensity}" _n
file write tablewrite "\label{fig:hist_rggvy_intensity}" _n
file write tablewrite "\vspace{-2mm}" _n
file write tablewrite "{\includegraphics[width=0.67\textwidth, trim={0 25mm 0 23mm}, clip]{${texfig_short}/figure_hist_rggvy_intensity.pdf}} " _n
file write tablewrite "\vspace{-2mm}" _n
file write tablewrite "\caption*{\scriptsize Note. --- " _n
file write tablewrite "This histogram summarizes RGGVY district-level treatment intensity for the 130 10th-Plan districts in our main RD sample. " _n
file write tablewrite "As a proxy for treatment intensity, we divide the count of villages treated by RGGVY each district " _n
file write tablewrite "(per program administrative data) by the total number of villages in that district. " _n
file write tablewrite "Below, we isolate the 90 districts on the right end of this distribution, " _n
file write tablewrite "for which RGGVY treated at least 60\% of their constituent villages. " _n
file write tablewrite "Our split sample analysis omits 5 districts for which RGGVY administrative " _n
file write tablewrite "data report an implausibly large number of treated villages. " _n
file write tablewrite "}" _n
file write tablewrite "\end{figure}" _n

file close tablewrite

}

****************************************************************** 
****************************************************************** 

** Figure A8: Histogram of districts rural power supply (and accompanying scatter plot for R1 response)
{
	// start with full dataset
use "$panel/panel_dataset_full.dta", clear

	// assign hours of power at the distrct level
egen temp1 = mean(vdp_pwr_h_all_avg) if vdp_pwr_h_all_avg_11>0 & vdp_pwr_h_all_avg_11!=., by(st_code dt_code)
egen HRS_all_wide = mode(temp1), by(st_code dt_code)
egen temp2 = mean(vdp_pwr_h_dom_avg) if vdp_pwr_h_dom_avg_11>0 & vdp_pwr_h_dom_avg_11!=., by(st_code dt_code)
egen HRS_dom_wide = mode(temp2), by(st_code dt_code)
gen HRS_all = HRS_all_wide>=10 & HRS_all_wide!=.
gen HRS_dom = HRS_dom_wide>=10 & HRS_dom_wide!=.
replace HRS_all = . if HRS_all_wide==.
replace HRS_dom = . if HRS_dom_wide==.

	// isolate RD sample
keep if vplan4<11 & corr_state==1 & sample==1 & sample_h==1 & sing_h==1 & pop_non_zero==1 
keep if inrange(tot_p,150,450)


	// hard-code power deficits by state
	// based on state-level power deficits as reported in the Load Generation Balance Report: 2011-12 
	// http://www.cea.nic.in/reports/annual/lgbr/lgbr-2011.pdf
	// http://large.stanford.edu/courses/2012/ph241/bordia1/docs/lgbr_report.pdf
tab state
gen def = .
replace def = 17.3 if state=="CHHATTISGARH"
replace def = 4.8 if state=="KARNATAKA"
replace def = 0.0 if state=="WEST BENGAL"
replace def = -1.6 if state=="GUJARAT"
replace def = -6.0 if state=="HARYANA"
replace def = -7.0 if state=="RAJASTHAN"
replace def = -11.0 if state=="JHARKHAND"	
replace def = -12.1 if state=="ANDHRA PRADESH"
replace def = -15.4 if state=="ORISSA"
replace def = -18.2 if state=="BIHAR"
replace def = -18.9 if state=="MAHARASHTRA"
replace def = -19.4 if state=="MADHYA PRADESH"
assert def!=.

	// collapse to district level
collapse (count) pca01_id, by(st_code dt_code stdt state district HRS_all_wide HRS_dom_wide def) fast

  // Histogram (for appendix)
twoway	///
	(hist HRS_all [fw=pca01_id],  freq fcolor(navy) lcolor(black) start(0.5) width(1)) ///
	, ///
	ytitle("Number of villages", size(medsmall))  ///
	xtitle("Rural electricity supply by district (2011 hours/day)", size(medsmall)) /// 
	/*title("Indian Villages by 2001 Population", color(black) size(medlarge))*/ ///
	graphregion(lcolor(white)) graphregion(color(white)) plotregion(fcolor(white)) ///
	xlabel(0(3)24, labsize(medsmall)) ylabel(, nogrid angle(0) labsize(medsmall)) ///
	legend(off) aspect(0.5)
graph export "$texfig/figure_hist_hours.pdf", replace 
		

cap file close tablewrite
file open tablewrite using "$texfig/figure_hist_hours.tex", write text replace
 
file write tablewrite "\begin{figure}[h!]\centering" _n
file write tablewrite "\caption{Rural power supply by district (2011, hours per day)}" _n
file write tablewrite "\label{fig:hist_hours}" _n
file write tablewrite "\vspace{-2mm}" _n
file write tablewrite "{\includegraphics[width=0.69\textwidth, trim={0 15mm 0 14mm}, clip]{${texfig_short}/figure_hist_hours.pdf}} " _n
file write tablewrite "\vspace{-2mm}" _n
file write tablewrite "\caption*{\scriptsize Note. --- " _n
file write tablewrite "This histogram summarizes the quality of rural power supply for districts in our main RD sample. " _n
file write tablewrite "We calculate this variable by averaging hours of all-sector power (from the 2011 Census, at the village level) " _n
file write tablewrite "over all electrified villages in each district. " _n
file write tablewrite "Below, we split on districts with at least 10 hours per day of power to the average electrified village. " _n
file write tablewrite "This histogram weights districts by the number of villages in our main RD sample. " _n
file write tablewrite "}" _n
file write tablewrite "\end{figure}" _n

file close tablewrite

}

****************************************************************** 
****************************************************************** 

** Table C1: RGGVY data irregularities
{
use "$rggvy/rggvy_covered_master.dta", clear
foreach v of varlist v_id coverage_cat-status_franchisee {
  rename `v' `v'_cov
}
keep *code* *_cov state district block village

merge m:m *code* using "$rggvy/rggvy_completed_master.dta"

foreach v of varlist v_id coverage_cat-status_franchisee {
  rename `v' `v'_com
}

drop vi_link-rowname *_ind
drop comp_dts_tot_*
egen comp_dts_tot_single_cov = rowtotal(comp_dts_*_single_cov), missing
egen comp_dts_tot_three_cov = rowtotal(comp_dts_*_three_cov), missing
egen comp_dts_tot_single_com = rowtotal(comp_dts_*_single_com), missing
egen comp_dts_tot_three_com = rowtotal(comp_dts_*_three_com), missing
egen comp_trans_cov = rowtotal(comp_11kv_single_cov-comp_abc_three_cov), missing
egen comp_trans_com = rowtotal(comp_11kv_single_com-comp_abc_three_com), missing

destring dpr_code, replace
merge m:m dpr_code using "$rggvy/rggvy_district_progress_X_XI_processed.dta", keepusing(plan) nogen

  // Mismatch completed to covered (matched only)
gen mismatch = 0 if _merge==3
foreach v_cov of varlist comp_*_cov {
  local v_com = substr("`v_cov'",1,length("`v_cov'")-3) + "com"
  replace mismatch = 1 if `v_cov' != `v_com' & _merge==3
}
sum mismatch
local mism = string(r(mean)*100,"%9.1f")  // 26.8%
sum mismatch if plan==11
local mismb = string(r(mean)*100,"%9.1f")  // 18.6%

  // Either completed or covered missing (all)
cap drop missing_1
gen missing_1 = 0 
foreach v_cov of varlist comp_rhh_cov comp_bpl_cov comp_tot_habit_cov {
  local v_com = substr("`v_cov'",1,length("`v_cov'")-3) + "com"
  replace missing_1 = 1 if (mi(`v_cov') & v_id_cov!=.) | (mi(`v_com') & v_id_com!=.)
}
replace missing_1 = missing_1+1 if (comp_dts_tot_single_cov==. & comp_dts_tot_three_cov==. & v_id_cov!=.) | (comp_dts_tot_single_com==. & comp_dts_tot_three_com==. & v_id_com!=.)
replace missing_1 = missing_1+1 if (comp_trans_cov==. & v_id_cov!=.) | (comp_trans_com==. & v_id_com!=.)
count if missing_1>0 & missing_1!=.
local miss_1 = string(r(N)*100/_N,"%9.1f") // 77.9% 
di `miss_1'
count if plan==10
local N10 = r(N)
count if missing_1>0 & missing_1!=. & plan==10
local miss_1b = string(r(N)*100/`N10',"%9.1f") // 65.3% 
di `miss_1b'

  // Both completed and covered missing (matched)
cap drop missing_2
gen missing_2 = 0 if _merge==3
foreach v_cov of varlist comp_rhh_cov comp_bpl_cov comp_tot_habit_cov {
  local v_com = substr("`v_cov'",1,length("`v_cov'")-3) + "com"
  replace missing_2 = 1 if (mi(`v_cov') &  mi(`v_com') &  v_id_cov!=. & v_id_com!=.) ///
	                       | (mi(`v_cov') & v_id_cov!=. & v_id_com==.) |  (mi(`v_com') & v_id_com!=. & v_id_cov==.)
}
replace missing_2 = missing_2+1 if (comp_dts_tot_single_cov==. & comp_dts_tot_three_cov==. & comp_dts_tot_single_com==. & comp_dts_tot_three_com==. & v_id_cov!=. & v_id_com!=.) ///
                                 | (comp_dts_tot_single_cov==. & comp_dts_tot_three_cov==. & v_id_cov!=. & v_id_com==.) ///
											           | (comp_dts_tot_single_com==. & comp_dts_tot_three_com==. & v_id_com!=. & v_id_cov==.) 
replace missing_2 = missing_2+1 if (comp_trans_cov==. & comp_trans_com==. & v_id_cov!=. & v_id_com!=.) ///
                                 | (comp_trans_cov==. & v_id_cov!=. & v_id_com==.) | (comp_trans_com==. & v_id_com!=. & v_id_cov==.)
count if missing_2>0 & missing_2!=.
local miss_2 = string(r(N)*100/_N,"%9.1f") // 74.4% 
di `miss_2'
count if missing_2==3
local miss_3 = string(r(N)*100/_N,"%9.1f") // 33.4% 
di `miss_3'
count if plan==10
local N10 = r(N)
count if missing_2>0 & missing_2!=. & plan==10
local miss_2b = string(r(N)*100/`N10',"%9.1f") // 59.9% 
di `miss_2b'
count if missing_2==3 & plan==10
local miss_3b = string(r(N)*100/`N10',"%9.1f") // 22.3% 
di `miss_3b'


  // Villages with status completed, but no nonzero outcomes
gen comp_zeros = 1 if status_work_completed_cov==1 | status_work_completed_com==1
foreach v_cov of varlist comp_rhh_cov-comp_tot_dalit_cov {
  local v_com = substr("`v_cov'",1,length("`v_cov'")-3) + "com"
  replace comp_zeros = 0 if mi(`v_cov')==0 & `v_cov'>0 & v_id_cov!=. & comp_zeros==1
	replace comp_zeros = 0 if mi(`v_com')==0 & `v_com'>0 & v_id_com!=. & comp_zeros==1
}
replace comp_zeros = 0 if ((comp_dts_tot_single_cov!=. & comp_dts_tot_single_cov>0) | (comp_dts_tot_three_cov!=. ///
                          & comp_dts_tot_three_cov>0)) & v_id_cov!=. & comp_zeros==1
replace comp_zeros = 0 if ((comp_dts_tot_single_com!=. & comp_dts_tot_single_com>0) | (comp_dts_tot_three_com!=. ///
                          & comp_dts_tot_three_com>0)) & v_id_com!=. & comp_zeros==1
replace comp_zeros = 0 if comp_trans_cov!=. & comp_trans_cov>0 & v_id_cov!=. & comp_zeros==1
replace comp_zeros = 0 if comp_trans_com!=. & comp_trans_com>0 & v_id_com!=. & comp_zeros==1
sum comp_zeros // 4.3% 
 
  // Completed villages with status not energised
sum status_energised_com 
local not_en = string((1-r(mean))*100,"%9.1f") // 24.4% 
sum status_energised_com if plan==10
local not_enb = string((1-r(mean))*100,"%9.1f") // 14.1% 
 
  // Covered villages listed as completed but not in completed dataset
sum status_*_cov if _merge==3
sum status_*_cov if _merge==1 // 0.9% 
 
  // Number of habitations covered greater than count from habitation list
drop vi_code
rename vi_code vi_code
merge m:m st_code dt_code vi_code	using "$loh/rggvy_list_of_habs_names.dta", gen(merge_lh)
duplicates r v_id_c*
collapse (sum) prop_*_co? comp_*_co? lh_hab_count (max) status_*_co? merge_lh, by(v_id_c* st_code dt_code bk_code vi_code plan) fast

gen hab_irreg_1 = 0 if merge_lh==3 & lh_hab_count!=. & (comp_tot_habit_com!=. | comp_tot_habit_cov!=.)  
sum lh_hab_count if hab_irreg_1==0, detail
sum comp_tot_habit_com if hab_irreg_1==0, detail
sum comp_tot_habit_cov if hab_irreg_1==0, detail
replace hab_irreg_1 = 1 if hab_irreg_1==0 & comp_tot_habit_com>lh_hab_count & comp_tot_habit_com!=.
replace hab_irreg_1 = 1 if hab_irreg_1==0 & comp_tot_habit_cov>lh_hab_count & comp_tot_habit_cov!=.
sum hab_irreg_1 if (comp_tot_habit_com>0 | comp_tot_habit_cov>0)
sum hab_irreg_1 // 5.3 %
	
	// Number of habitations covered does not match habitation list
gen hab_irreg_2 = 0 if merge_lh==3 & lh_hab_count!=. & (comp_tot_habit_com!=. | comp_tot_habit_cov!=.) 
replace hab_irreg_2 = 1 if hab_irreg_1==0 & comp_tot_habit_com!=lh_hab_count & comp_tot_habit_com!=.
replace hab_irreg_2 = 1 if hab_irreg_1==0 & comp_tot_habit_cov!=lh_hab_count & comp_tot_habit_cov!=.
 
sum hab_irreg_2 if (comp_tot_habit_com>0 | comp_tot_habit_cov>0)
sum hab_irreg_2 // 36.9 %

	// Number of habitations covered is greater than number in habitation census
destring vi_code, replace
merge m:m st_code dt_code vi_code using "$panel/pca_2001_names_hab_merge_final.dta", gen(merge_hab)
collapse (sum) prop_*_co? comp_*_co? lh_hab_count (max) status_*_co? merge_lh count_h3 count_h9 merge_hab (min) vplan*, by(v_id_c* st_code dt_code bk_code vi_code plan)
gen hab_irreg_3 = 0 if merge_hab==3 & (count_h3!=. | count_h9!=.) & ((comp_tot_habit_com!=. ///
                     & comp_tot_habit_com>0) | (comp_tot_habit_cov!=. & comp_tot_habit_cov>0))
replace hab_irreg_3 = 1 if hab_irreg_3==0 & comp_tot_habit_com>count_h3 & comp_tot_habit_com>count_h9 ///
                     & comp_tot_habit_com!=.
replace hab_irreg_3 = 1 if hab_irreg_3==0 & comp_tot_habit_cov>count_h3 & comp_tot_habit_cov>count_h9 ///
                     & comp_tot_habit_cov!=.
sum hab_irreg_3	
local hab_irreg = string(r(mean)*100,"%9.1f") // 32.2%
sum hab_irreg_3 if plan==10	
local hab_irregb = string(r(mean)*100,"%9.1f") // 32.2%

	// Number of households connected is greater than number in Census
merge m:m st_code dt_code vi_code using "$pca/pca_census01_names.dta", gen(merge_pca01) keepusing(pca01_id no_hh tot_p)
merge m:m st_code dt_code vi_code using "$pca/pca_census11_names.dta", gen(merge_pca11) keepusing(pca11_id no_hh)
collapse (sum) prop_*_co? comp_*_co? no_hh11 (max) status_*_co? merge_lh count_h3 count_h9 merge_hab merge_pca01 merge_pca11, ///
         by(v_id_c* st_code dt_code bk_code vi_code pca01_id no_hh tot_p vplan*) fast
egen hh_census = rowmax(no_hh no_hh11)

gen hh_irreg = 0 if hh_census!=. 
foreach v of varlist comp_rhh_cov-comp_st_hh_cov comp_rhh_com-comp_st_hh_com {
  replace hh_irreg = 1 if hh_irreg==0 & `v'>hh_census & `v'!=.
}
sum hh_irreg	// 2.7%

	// Number of households proposed is greater than number in Census
gen hh_irreg_prop = 0 if no_hh!=. 
foreach v of varlist prop_rhh* prop_bpl* {
  replace hh_irreg_prop = 1 if hh_irreg_prop==0 & `v'>no_hh & `v'!=.
}
sum hh_irreg_prop	// 4.2%

	// MAKE TABLE
cap file close tablewrite
file open tablewrite using "$textab/table_rggvy_irreg.tex", write text replace

file write tablewrite "\begin{table}[ht]\centering" _n
file write tablewrite "\caption{RGGVY microdata irregularities \label{tab:rggvy_irreg}}" _n
file write tablewrite "\vspace{-2mm}" _n
file write tablewrite "\small" _n
file write tablewrite "\begin{tabular}{lcc}" _n
file write tablewrite "\hline" _n
file write tablewrite "\multicolumn{1}{c}{Type of Irregularity}& " _n
file write tablewrite "\multicolumn{1}{c}{$\begin{matrix}\text{Percent of}\\ \text{Villages}\end{matrix}$} & " _n
file write tablewrite "\multicolumn{1}{c}{$\begin{matrix}\text{Percent of}\\ \text{10th-Plan Villages}\end{matrix}$} \\" _n
file write tablewrite "[0.1em]" _n
file write tablewrite "\hline" _n
file write tablewrite "\\ " _n
file write tablewrite "[-0.5em]" _n
file write tablewrite "RGGVY outcomes disagree across Covered and Completed datasets & `mism' & `mismb' \\" _n
file write tablewrite "Outcomes missing from either Covered or Completed dataset & `miss_1' & `miss_1b' \\" _n
file write tablewrite "Outcomes missing from both Covered and Completed datasets & `miss_2' & `miss_2b' \\" _n
file write tablewrite "All outcomes missing from both Covered and Completed datasets & `miss_3' & `miss_3b' \\" _n
file write tablewrite "Completed dataset reports status not energised & `not_en' & `not_enb' \\" _n
file write tablewrite "RGGVY covers more habitations than exist in village & `hab_irreg' & `hab_irregb' \\" _n
file write tablewrite "[0.25em]\hline" _n
file write tablewrite "\end{tabular}" _n
file write tablewrite "\captionsetup{width=\textwidth}" _n
file write tablewrite "\vspace{-2mm}" _n
file write tablewrite "\caption*{\scriptsize Note. ---  " _n
file write tablewrite "This table shows data irregularities across the RGGVY Covered and Completed  " _n
file write tablewrite "village datasets, which we do not use in our analysis." _n
file write tablewrite "We report the percent of all villages and of  " _n
file write tablewrite "10th-Plan villages that satisfy each irregularity " _n
file write tablewrite "criterion, where the denominator excludes missing and unmatched villages.  " _n
file write tablewrite "Program outcomes considered in the first four " _n
file write tablewrite "rows include the count of household connections, aggregate transformer  " _n
file write tablewrite "capacity installed, and aggregate transmission capacity installed." _n
file write tablewrite "(The first three rows count villages where \emph{any} outcome disagrees  " _n
file write tablewrite "or is missing; the fourth row counts only villages for  " _n
file write tablewrite "which \emph{all} of these outcomes are missing.)" _n
file write tablewrite "}" _n
file write tablewrite "\end{table}" _n

file close tablewrite

}

****************************************************************** 
****************************************************************** 

** Table C2: RGGVY DPR implementation summary stats
{
use "$rggvy/rggvy_district_progress_X_XI_processed.dta", clear
egen dt_group = group(st_code dt_code)
egen temp_group = group(plan implement_type)
unique st_code, by(temp_group) gen(uniq_st)
unique dt_group, by(temp_group) gen(uniq_dt)
unique dpr_code, by(temp_group) gen(uniq_dpr)
unique st_code if plan==10
local st_uniq_10 = r(unique)
unique st_code if plan==11
local st_uniq_11 = r(unique)
unique dt_group if plan==10
local dt_uniq_10 = r(unique)
unique dt_group if plan==11
local dt_uniq_11 = r(unique)
count if award_date<sanction_date
replace award_date = sanction_date if award_date<sanction_date
replace award_date = max(award_date,17553) if plan==11

collapse (mean) state=uniq_st district=uniq_dt dpr=uniq_dpr ///
        (min) min_sdate=sanction_date (mean) mean_sdate=sanction_date (max) max_sdate=sanction_date ///
        (min) min_adate=award_date (mean) mean_adate=award_date (max) max_adate=award_date ///
				(sum) sanction_cost award_cost total_released coverage_UDE achiev_UDE coverage_ELEC ///
				achiev_ELEC coverage_BPL achiev_BPL, by(plan implement_type)
				
drop mean* coverage* //ach*
foreach v of varlist sanction_cost award_cost total_released {
	replace `v' = `v'*100000/1e9
}

gen sort = 1
replace sort = 2 if implement_type=="SDOP"
replace sort = 3 if implement_type=="SEB"
replace sort = 4 if implement_type=="DISCOM"
replace sort = 5 if implement_type=="COOP"

sort plan sort
drop sort

drop *sdate award_cost sanction_cost

replace implement_type = "Public Sector Undertakings" if implement_typ=="CPSU"
replace implement_type = "State Departments of Power" if implement_typ=="SDOP"
replace implement_type = "State Electricity Boards" if implement_typ=="SEB"
replace implement_type = "Distribution Companies" if implement_typ=="DISCOM"
replace implement_type = "Rural Electricity Coops" if implement_typ=="COOP"

gen temp_y1 = year(min_adate)
gen temp_y2 = year(max_adate)
gen temp_m1 = string(month(min_adate))
gen temp_m2 = string(month(max_adate))
foreach v of varlist temp_m* {
  replace `v' = "Jan " if `v'=="1"
  replace `v' = "Feb " if `v'=="2"
  replace `v' = "Mar " if `v'=="3"
  replace `v' = "Apr " if `v'=="4"
  replace `v' = "May " if `v'=="5"
  replace `v' = "Jun " if `v'=="6"
  replace `v' = "Jul " if `v'=="7"
  replace `v' = "Aug " if `v'=="8"
  replace `v' = "Sep " if `v'=="9"
  replace `v' = "Oct " if `v'=="10"
  replace `v' = "Nov " if `v'=="11"
  replace `v' = "Dec " if `v'=="12"
}
gen date_table = temp_m1 + string(temp_y1) + " -- " + temp_m2 + string(temp_y2)
gen year_table = string(temp_y1) + "--" + string(temp_y2)
replace year_table = subinstr(year_table,"--20","--",1)

gen sort = _n
set obs  11

replace plan = 10 if _n==10
replace implement_type = "\emph{Total}" if _n==10
replace state = `st_uniq_11' if _n==10
replace district = `dt_uniq_10' if _n==10
foreach v of varlist total_released achiev_UDE achiev_ELEC achiev_BPL {
  qui sum `v' if plan==10
  replace `v' = r(sum) if _n==10
}
replace sort = 4.5 if _n==10

replace plan = 11 if _n==11
replace implement_type = "\emph{Total}" if _n==11
replace state = `st_uniq_11' if _n==11
replace district = `dt_uniq_11' if _n==11
foreach v of varlist total_released achiev_UDE achiev_ELEC achiev_BPL {
  qui sum `v' if plan==11
  replace `v' = r(sum) if _n==11
}
replace sort = 9.5 if _n==11
sort sort
format %12.2f total_released 


	// MAKE TABLE
cap file close tablewrite
file open tablewrite using "$textab/table_summary_stats_rggvy.tex", write text replace

file write tablewrite "\begin{table}[ht]\centering" _n
file write tablewrite "\caption{Summary statistics -- RGGVY implementation and scope \label{tab:summary_stats_rggvy}}" _n
file write tablewrite "\vspace{-2mm}" _n
file write tablewrite "\small" _n
file write tablewrite "\begin{tabular}{lrrrrrr}" _n
file write tablewrite "\hline" _n
file write tablewrite "\multicolumn{1}{c}{$\begin{matrix}\text{Type of}\\ \text{Implementing Agency}\end{matrix}$} " _n
file write tablewrite "&	States " _n
file write tablewrite "&	Districts	" _n
file write tablewrite "&	\multicolumn{1}{c}{$\begin{matrix}\text{Award}\\ \text{Dates} \end{matrix}$} " _n
file write tablewrite "&	$\begin{matrix}\text{Unelectrified}\\\text{Villages}\end{matrix}$	" _n
file write tablewrite "&	$\begin{matrix}\text{Electrified}\\\text{Villages}\end{matrix}$	" _n
file write tablewrite "&	$\begin{matrix}\text{BPL}\\\text{Connections}\end{matrix}$	\\ " _n
file write tablewrite "[0.1em]" _n
file write tablewrite "\hline" _n
file write tablewrite "\\ " _n
file write tablewrite "[-0.5em]" _n

file write tablewrite "\multicolumn{1}{l}{\textbf{A.} \underline{10th Plan}}\\" _n
forvalues r = 1/5 {
	local c1 = implement_type[`r']
	local c2 = state[`r']
	local c3 = district[`r']
	local c4 = year_table[`r']
	local c5: di %12.0fc achiev_UDE[`r']
	local c6: di %12.0fc achiev_ELEC[`r'] 
	local c7: di %12.0fc achiev_BPL[`r']
	file write tablewrite "~~~`c1' & `c2' & `c3' & `c4' & `c5' & `c6' & `c7' \\" _n
}
file write tablewrite "[1em]\multicolumn{1}{l}{\textbf{B.} \underline{11th Plan}}\\" _n	
forvalues r = 6/11 {
	local c1 = implement_type[`r']
	local c2 = state[`r']
	local c3 = district[`r']
	local c4 = year_table[`r']
	local c5: di %12.0fc achiev_UDE[`r']
	local c6: di %12.0fc achiev_ELEC[`r'] 
	local c7: di %12.0fc achiev_BPL[`r']
	file write tablewrite "~~~`c1' & `c2' & `c3' & `c4' & `c5' & `c6' & `c7' \\" _n
}

file write tablewrite "[0.25em]\hline" _n
file write tablewrite "\end{tabular}" _n
file write tablewrite "\captionsetup{width=\textwidth}" _n
file write tablewrite "\vspace{-2mm}" _n
file write tablewrite "\caption*{\scriptsize Note. ---  This table summarizes RGGVY program outcomes at the DPR (district) level. " _n
file write tablewrite "Public sector undertakings include government-owned generating companies," _n
file write tablewrite "such as Power Grid Corporation of India and National Hydroelectric Power Corporation.  " _n
file write tablewrite "The right three columns show the number of previously unelectrified and previously " _n
file write tablewrite "electrified villages treated by the program, as well as the " _n
file write tablewrite "the number of below poverty line households that received electric connections. " _n
file write tablewrite "Villages classified as electrified had basic electricity infrastructure with at least 10\% of " _n
file write tablewrite "households electrified prior to RGGVY implementation. " _n
file write tablewrite "23 (of 27) states contain both 10th and 11th Plan districts, while 30 (of 530) individual " _n
file write tablewrite "districts were targeted under both Plans. For a few districts, we correct financial award dates reported to have " _n
file write tablewrite "occurred before their respective project sanction dates or before the official announcement of the program." _n
file write tablewrite "}" _n
file write tablewrite "\end{table}" _n

file close tablewrite

}

****************************************************************** 
****************************************************************** 

** Table C3: Shapefile area correlations by state
{
use "$panel/panel_dataset_no_outcomes.dta", clear
tab state if rf_sample_150

gen plan10 = vplan4<11
collapse (count) pca01_id (mean) area_corr_01_st (max) temp=area_corr_01_st, by(state st_code plan10) fast
assert area_corr_01_st==temp
drop temp
unique st_code plan10
assert r(unique)==r(N)
sum pca01_id
local N = r(sum)
sum pca01_id if plan10==1
local N10 = r(sum)

local state_list = "20 19 10 24 6 29 27 28 8 21 23 22 5 9 2 18 1 12 17 15 13 11"
replace state = "ODISHA" if state=="ORISSA"

   // MAKE TABLE
cap file close tablewrite
file open tablewrite using "$textab/table_state_corr.tex", write text replace
{
file write tablewrite "\begin{table}[ht!]\centering" _n
file write tablewrite "\caption{Correlation of shapefiles with village areas}" _n
file write tablewrite "\label{tab:state_corr}" _n
file write tablewrite "\vspace{-2mm}" _n
file write tablewrite "\small" _n
file write tablewrite "\begin{tabular}{lrrr}" _n
file write tablewrite "\hline" _n

file write tablewrite "\multicolumn{1}{c}{State} " _n
file write tablewrite "& \multicolumn{1}{c}{$ \begin{matrix} \text{Area} \\ \text{Correlation} \end{matrix} $} " _n
file write tablewrite "& \multicolumn{1}{c}{~~$ \begin{matrix} \text{Percent of} \\ \text{Total Villages} \end{matrix} $} " _n
file write tablewrite "& \multicolumn{1}{c}{~~$ \begin{matrix} \text{Percent of} \\ \text{10th-Plan Villages} \end{matrix} $} " _n
file write tablewrite " \\" _n

file write tablewrite "[0.25em]\hline" _n

foreach s in `state_list' {
  preserve
	keep if st_code==`s'
	local st_name = subinstr(proper(state[1]),"&","and",1)
	restore
  file write tablewrite "`st_name'"
		
	sum area_corr_01_st if st_code==`s'
	local cor = string(r(mean),"%9.3f")
	if "`cor'"=="." {
	  local cor = "missing"
	}
	file write tablewrite "& `cor'"

	sum pca01_id if st_code==`s'
	local vil1 = string(100*r(sum)/`N',"%9.1f")
	file write tablewrite "& `vil1'"

	sum pca01_id if st_code==`s' & plan10==1
	local vil2 = string(100*r(sum)/`N10',"%9.1f")
	file write tablewrite "& `vil2'"

  file write tablewrite "\\" _n 
	if `s'== 22 | `s'==1 | `s'==11 {
    file write tablewrite "[1em]" _n 
  }  	  
}

file write tablewrite "States with correlation $> 0.35$ & "
sum pca01_id if area_corr_01_st > 0.35 & area_corr_01_st!=.
local vil1 = string(100*r(sum)/`N',"%9.1f")
file write tablewrite "& `vil1'"
sum pca01_id if area_corr_01_st > 0.35 & area_corr_01_st!=. & plan10==1
local vil2 = string(100*r(sum)/`N10',"%9.1f")
file write tablewrite "& `vil2'"
file write tablewrite "\\" _n 

file write tablewrite "States with correlation $< 0.35$ & "
sum pca01_id if area_corr_01_st < 0.35 & area_corr_01_st!=.
local vil1 = string(100*r(sum)/`N',"%9.1f")
file write tablewrite "& `vil1'"
sum pca01_id if area_corr_01_st < 0.35 & area_corr_01_st!=. & plan10==1
local vil2 = string(100*r(sum)/`N10',"%9.1f")
file write tablewrite "& `vil2'"
file write tablewrite "\\" _n 

file write tablewrite "States with missing shapefiles & "
sum pca01_id if area_corr_01_st==.
local vil1 = string(100*r(sum)/`N',"%9.1f")
file write tablewrite "& `vil1'"
sum pca01_id if area_corr_01_st==. & plan10==1
local vil2 = string(100*r(sum)/`N10',"%9.1f")
file write tablewrite "& `vil2'"
file write tablewrite "\\" _n 

file write tablewrite "[0.25em]\hline" _n

file write tablewrite "\end{tabular}" _n
file write tablewrite "\captionsetup{width=\textwidth}" _n
file write tablewrite "\vspace{-2mm}" _n
file write tablewrite "\caption*{\scriptsize Note. ---  This table reports the correlation between polygon areas " _n
file write tablewrite "(calculated from village shapefiles) and village areas reported in the Census's 2001 " _n
file write tablewrite "Village Directory. Our RD analysis includes the 12 states for which this correlation is " _n
file write tablewrite "at least 0.35. We omit the 5 states with shapefile areas that are uncorrelated with " _n
file write tablewrite "reported village areas, a sign of low quality shapefiles. " _n
file write tablewrite "The middle column reports the percent of Indian villages contained in each state; the right " _n
file write tablewrite "column use total villages in RGGVY 10th-Plan districts as a denominator. " _n
file write tablewrite "This table omits 3 states without any RGGVY 10th-Plan districts (Goa, Punjab, and Tamil Nadu). " _n
file write tablewrite "It also omits 2 states which were eligible under RGGVY's 10th Plan, " _n
file write tablewrite "but contain no single-habitation 10th-Plan villages in our RD bandwidth (Kerala and Tripura)." _n
file write tablewrite "}" _n
file write tablewrite "\end{table}" _n
}
file close tablewrite

}

****************************************************************** 
****************************************************************** 

** Figure C1: cookie-cuttering night lights 
{
cap file close tablewrite
file open tablewrite using "$texfig/figure_lights_zoomed.tex", write text replace
 
file write tablewrite "\begin{figure}[h!]\centering" _n
file write tablewrite "\caption{Example of nighttime brightness with village boundaries}" _n
file write tablewrite "\label{fig:lights_zoomed}" _n
file write tablewrite "\vspace{-2mm}" _n
file write tablewrite "{\includegraphics[width=0.5\textwidth]{${texfig_short}/nightlights_zoomed2_FINAL.pdf}} " _n
file write tablewrite "\vspace{-2mm}" _n
file write tablewrite "\caption*{\scriptsize Note. --- " _n
file write tablewrite "This image shows a close-up of an ``average visible lights'' image overlaid  " _n
file write tablewrite "with village boundaries, for an area in Rajasthan. " _n
file write tablewrite "The \$\approx 1\text{km}^2\$ pixels in this image range in brightness values from 3 to 38. " _n
file write tablewrite "We construct our ``raw'' nighttime brightness variable \$ L_v^t \$ by assigning each village \$ v \$ the value of its " _n
file write tablewrite "brightest pixel from the composite satellite image in each year \$ t \$. " _n
file write tablewrite "}" _n
file write tablewrite "\end{figure}" _n

file close tablewrite

}

****************************************************************** 
****************************************************************** 

** Table C4: Habitation merge counts
{
use pca01_id names_id tot_p state st_code h?v_id match_h? sing_h count_h? vplan4 c_code01 pop_mismatch20 ///
    fs_sample_150 rf_sample_150 corr_state using "$panel/panel_dataset_no_outcomes.dta", clear
unique pca01_id

merge 1:m names_id using "$hmerge/pca_2001_names_merges_all_matched3_processed.dta", keep(1 3) nogen ///
    keepusing(match_*_vi fmatch3 bk_MM_matches vi_MM_matches)
unique pca01_id
gen match_exact3 = 0
foreach v of varlist match_*_vi {
  replace match_exact3 = 1 if `v'!="" & fmatch3=="" & bk_MM_matches=="" & vi_MM_matches=="" & match_h3==1
}
gen match_rec3 = match_exact3==0 & fmatch3!="" & bk_MM_matches=="" & vi_MM_matches=="" & match_h3==1
gen match_mm3 = match_exact3==0 & match_rec3==0 & bk_MM_matches!="" & vi_MM_matches!="" & match_h3==1
gen match_mystery3 = match_exact3==0 & match_rec3==0 & match_mm3==0 & match_h3==1
drop match_*_vi fmatch3 bk_MM_matches vi_MM_matches

merge 1:m names_id using "$hmerge/pca_2001_names_merges_all_matched9_processed.dta", keep(1 3) nogen ///
    keepusing(match9_*_vi fmatch9 bk_MM_matches vi_MM_matches)
unique pca01_id
gen match_exact9 = 0
foreach v of varlist match9_*_vi {
  replace match_exact9 = 1 if `v'!="" & fmatch9=="" & bk_MM_matches=="" & vi_MM_matches=="" & match_h9==1
}
gen match_rec9 = match_exact9==0 & fmatch9!="" & bk_MM_matches=="" & vi_MM_matches=="" & match_h9==1
gen match_mm9 = match_exact9==0 & match_rec9==0 & bk_MM_matches!="" & vi_MM_matches!="" & match_h9==1
gen match_mystery9 = match_exact9==0 & match_rec9==0 & match_mm9==0 & match_h9==1
drop match9_*_vi fmatch9 bk_MM_matches vi_MM_matches

local N = _N
count if tot_p>=150 & tot_p<=450
local Nb = r(N)
local Nc = string(`N', "%9.0fc")
local Nbc = string(`Nb', "%9.0fc")

count if match_exact3==1 & match_exact9==1 
local tab_1_1 = string(r(N)/`N',"%9.3f")
count if match_exact3==1 & match_exact9==0
local tab_1_2 = string(r(N)/`N',"%9.3f")
count if match_exact3==0 & match_exact9==1
local tab_1_3 = string(r(N)/`N',"%9.3f")
count if match_exact3==0 & match_exact9==0
local tab_1_4 = string(r(N)/`N',"%9.3f")

count if match_exact3==1 & match_exact9==1 & tot_p>=150 & tot_p<=450
local tab_1_1b = string(r(N)/`Nb',"%9.3f")
count if match_exact3==1 & match_exact9==0 & tot_p>=150 & tot_p<=450
local tab_1_2b = string(r(N)/`Nb',"%9.3f")
count if match_exact3==0 & match_exact9==1 & tot_p>=150 & tot_p<=450
local tab_1_3b = string(r(N)/`Nb',"%9.3f")
count if match_exact3==0 & match_exact9==0 & tot_p>=150 & tot_p<=450
local tab_1_4b = string(r(N)/`Nb',"%9.3f")

count if (match_exact3==1 | match_rec3==1) & (match_exact9==1 | match_rec9==1)
local tab_2_1 = string(r(N)/`N',"%9.3f")
count if (match_exact3==1 | match_rec3==1) & (match_exact9==0 & match_rec9==0)
local tab_2_2 = string(r(N)/`N',"%9.3f")
count if (match_exact3==0 & match_rec3==0) & (match_exact9==1 | match_rec9==1)
local tab_2_3 = string(r(N)/`N',"%9.3f")
count if (match_exact3==0 & match_rec3==0) & (match_exact9==0 & match_rec9==0) 
local tab_2_4 = string(r(N)/`N',"%9.3f")

count if (match_exact3==1 | match_rec3==1) & (match_exact9==1 | match_rec9==1) & tot_p>=150 & tot_p<=450
local tab_2_1b = string(r(N)/`Nb',"%9.3f")
count if (match_exact3==1 | match_rec3==1) & (match_exact9==0 & match_rec9==0) & tot_p>=150 & tot_p<=450
local tab_2_2b = string(r(N)/`Nb',"%9.3f")
count if (match_exact3==0 & match_rec3==0) & (match_exact9==1 | match_rec9==1) & tot_p>=150 & tot_p<=450
local tab_2_3b = string(r(N)/`Nb',"%9.3f")
count if (match_exact3==0 & match_rec3==0) & (match_exact9==0 & match_rec9==0) & tot_p>=150 & tot_p<=450
local tab_2_4b = string(r(N)/`Nb',"%9.3f")

count if (match_h3==1) & (match_h9==1)
local tab_3_1 = string(r(N)/`N',"%9.3f")
count if (match_h3==1) & (match_h9==0)
local tab_3_2 = string(r(N)/`N',"%9.3f")
count if (match_h3==0) & (match_h9==1)
local tab_3_3 = string(r(N)/`N',"%9.3f")
count if (match_h3==0) & (match_h9==0)
local tab_3_4 = string(r(N)/`N',"%9.3f")

count if (match_h3==1) & (match_h9==1) & tot_p>=150 & tot_p<=450
local tab_3_1b = string(r(N)/`Nb',"%9.3f")
count if (match_h3==1) & (match_h9==0) & tot_p>=150 & tot_p<=450
local tab_3_2b = string(r(N)/`Nb',"%9.3f")
count if (match_h3==0) & (match_h9==1) & tot_p>=150 & tot_p<=450
local tab_3_3b = string(r(N)/`Nb',"%9.3f")
count if (match_h3==0) & (match_h9==0) & tot_p>=150 & tot_p<=450
local tab_3_4b = string(r(N)/`Nb',"%9.3f")

egen count_h39 = rowmean(count_h3 count_h9)
sum count_h39 if match_h3==1 & match_h9==1
local tab_5_1 = string(r(mean),"%9.3f")
sum count_h3 if match_h3==1 & match_h9==0
local tab_5_2 = string(r(mean),"%9.3f")
sum count_h9 if match_h3==0 & match_h9==1
local tab_5_3 = string(r(mean),"%9.3f")

sum count_h39 if match_h3==1 & match_h9==1 & tot_p>=150 & tot_p<=450
local tab_5_1b = string(r(mean),"%9.3f")
sum count_h3 if match_h3==1 & match_h9==0 & tot_p>=150 & tot_p<=450
local tab_5_2b = string(r(mean),"%9.3f")
sum count_h9 if match_h3==0 & match_h9==1 & tot_p>=150 & tot_p<=450
local tab_5_3b = string(r(mean),"%9.3f")

sum sing_h if match_h3==1 & match_h9==0
local tab_6_1 = string(r(mean),"%9.3f")
sum sing_h if match_h3==0 & match_h9==1
local tab_6_2 = string(r(mean),"%9.3f")
sum sing_h if match_h3==1 & match_h9==1
local tab_6_3 = string(r(mean),"%9.3f")

sum sing_h if match_h3==1 & match_h9==0 & tot_p>=150 & tot_p<=450 
local tab_6_1b = string(r(mean),"%9.3f")
sum sing_h if match_h3==0 & match_h9==1 & tot_p>=150 & tot_p<=450 
local tab_6_2b = string(r(mean),"%9.3f")
sum sing_h if match_h3==1 & match_h9==1 & tot_p>=150 & tot_p<=450 
local tab_6_3b = string(r(mean),"%9.3f")

sum pop_mismatch20 if match_h3==1 & match_h9==0
local tab_7_1 = string(r(mean),"%9.3f")
sum pop_mismatch20 if match_h3==0 & match_h9==1
local tab_7_2 = string(r(mean),"%9.3f")
sum pop_mismatch20 if match_h3==1 & match_h9==1
local tab_7_3 = string(r(mean),"%9.3f")

sum pop_mismatch20 if match_h3==1 & match_h9==0 & tot_p>=150 & tot_p<=450 
local tab_7_1b = string(r(mean),"%9.3f")
sum pop_mismatch20 if match_h3==0 & match_h9==1 & tot_p>=150 & tot_p<=450 
local tab_7_2b = string(r(mean),"%9.3f")
sum pop_mismatch20 if match_h3==1 & match_h9==1 & tot_p>=150 & tot_p<=450 
local tab_7_3b = string(r(mean),"%9.3f")


	// MAKE TABLE
cap file close tablewrite
file open tablewrite using "$textab/table_hab_merge.tex", write text replace

file write tablewrite "\begin{table}[h]\centering" _n
file write tablewrite "\caption{Summary of habitation census merge results \label{tab:hab_merge}}" _n
file write tablewrite "\vspace{-2mm}" _n
file write tablewrite "\small" _n
file write tablewrite "\begin{tabular}{lcccc}" _n
file write tablewrite "\hline" _n
file write tablewrite "Habitation census match &	2003 and 2009 & 2003 only	& 2009 only	& Unmatched \\" _n
file write tablewrite "[0.1em]" _n
file write tablewrite "\hline" _n
file write tablewrite "\\ " _n
file write tablewrite "[-0.7em]" _n
file write tablewrite "\multicolumn{1}{l}{\textbf{A.} \underline{Match rates (all villages)}}\\" _n
file write tablewrite "~~~Exact matches            & `tab_1_1'  & `tab_1_2'  & `tab_1_3'  & `tab_1_4'  \\" _n
file write tablewrite "~~~~ + {\tt reclink}       & `tab_2_1'  & `tab_2_2'  & `tab_2_3'  & `tab_2_4'  \\" _n
file write tablewrite "~~~~ + {\tt Masala merge}  & `tab_3_1'  & `tab_3_2'  & `tab_3_3'  & `tab_3_4'  \\" _n
file write tablewrite "[0.8em]" _n
file write tablewrite "\multicolumn{1}{l}{\textbf{B.} \underline{Match rates (150--450 population)}}\\" _n
file write tablewrite "~~~Exact matches            & `tab_1_1b'  & `tab_1_2b'  & `tab_1_3b'  & `tab_1_4b'  \\" _n
file write tablewrite "~~~~ + {\tt reclink}       & `tab_2_1b'  & `tab_2_2b'  & `tab_2_3b'  & `tab_2_4b'  \\" _n
file write tablewrite "~~~~ + {\tt Masala merge}  & `tab_3_1b'  & `tab_3_2b'  & `tab_3_3b'  & `tab_3_4b'  \\" _n
file write tablewrite "[0.8em]" _n
file write tablewrite "\multicolumn{1}{l}{\textbf{C.} \underline{Summary statistics (all villages)}}\\	" _n
file write tablewrite "~~~Average habitations per village    & `tab_5_1'  & `tab_5_2'  & `tab_5_3'  & `tab_5_4'  \\	" _n
file write tablewrite "~~~Share single-habitation villages & `tab_6_1'  & `tab_6_2'  & `tab_6_3'  & `tab_6_4'  \\" _n
file write tablewrite "~~~Share with population mismatch > 20\%   & `tab_7_1'  & `tab_7_2'  & `tab_7_3'  & `tab_7_4'  \\" _n
file write tablewrite "[0.8em]" _n
file write tablewrite "\multicolumn{1}{l}{\textbf{D.} \underline{Summary statistics (150--450 population)}}\\	" _n
file write tablewrite "~~~Average habitations per village    & `tab_5_1b'  & `tab_5_2b'  & `tab_5_3b'  & `tab_5_4b'  \\	" _n
file write tablewrite "~~~Share single-habitation villages & `tab_6_1b'  & `tab_6_2b'  & `tab_6_3b'  & `tab_6_4b'  \\" _n
file write tablewrite "~~~Share with population mismatch > 20\%   & `tab_7_1b'  & `tab_7_2b'  & `tab_7_3b'  & `tab_7_4b'  \\" _n
file write tablewrite "[0.25em]\hline" _n
file write tablewrite "\end{tabular}" _n
file write tablewrite "\captionsetup{width=\textwidth}" _n
file write tablewrite "\vspace{-2mm}" _n
file write tablewrite "\caption*{\scriptsize Note. ---  This table shows results from the habitation merge algorithm described above. " _n
file write tablewrite "Panels A and B report the share of villages that have merged after each step of the algorithm." _n 
file write tablewrite "Panels C and D calculate summary statistics on the subset of Census panel villages that successfully merge to " _n
file write tablewrite "the habitation dataset. Panels A and C report match counts and summary statistics for" _n
file write tablewrite "all `Nc' villages, while Panels B and D report only the `Nbc' villages with 2001 populations between 150 and 450 " _n
file write tablewrite "(slightly wider than our optimal \texttt{rdrobust} bandwidths). Population mismatches occur " _n
file write tablewrite "when the sum of a village's constituent habitation populations " _n
file write tablewrite "deviates from both its 2001 and 2011 Census population by at least 20\%." _n
file write tablewrite "}" _n
file write tablewrite "\end{table}" _n

file close tablewrite

}

****************************************************************** 
****************************************************************** 

** Figures C2, C3, C4, C5: Histograms of habitation, SECC, EC, and DISE merges [and SHRUG, which we don't use since the merge is quite good]
{
	// Start with full dataset
use "$panel/panel_dataset_no_lights.dta", clear
drop hpca* 
gen tot_p_top = tot_p
replace tot_p_top = 4000 if tot_p>4000

	// Habitation merge indicator
egen hab_merge = rowmax(match_h3 match_h9)

	// DISE merge indicator
merge 1:1 pca01_id using "$panel/pca_school_selected_WIDE_new.dta", keep(1 3) keepusing(pca01_id) gen(merge_dise)

	// SECC merge indicator
merge 1:m pca01_id using "$panel/secc_pca_vill_all.dta", keep(1 3) keepusing(pca01_id n_tot secc_n_hh)
duplicates drop pca01_id, force
gen p_ratio = n_tot/tot_p11
gen hh_ratio = secc_n_hh/no_hh11
gen merge_secc = _merge==3 & p_ratio<=1.10 & hh_ratio<=1.10
gen merge_secc_10 = _merge==3 & p_ratio<=1.10 & hh_ratio<=1.10 & hh_ratio>=0.10
drop _merge

	// EC merge indicator
preserve
use "$panel/ec_shrid_pc01_panel.dta", clear	
tab count_dup_vill_ec, missing
drop if count_dup_vill_ec==1
tab count_dup_shrid, missing
drop if count_dup_shrid!=0
tab flag_ec
drop if flag_ec==1
keep pca01_id ec_*
reshape wide ec_employees ec_firms, i(pca01_id) j(ec_year)
drop *1990 *1998
drop if ec_firms2005==. & ec_firms2013==. & ec_employees2005==. & ec_employees2013==.
assert ec_employees2005!=. if ec_firms2005!=.
assert ec_employees2013!=. if ec_firms2013!=.
assert ec_firms2005!=0 & ec_firms2013!=0
tempfile ec
save `ec'
restore
merge 1:1 pca01_id using `ec', keep(1 3) 
gen merge_ec2005 = ec_firms2005!=. 
gen merge_ec2013 = ec_firms2013!=. 
gen any_work_ot = work_pooled_ot_p_11!=0

	// SHRUG merge indicator
merge 1:1 pca01_id using "$shrug/shrug_secc.dta", keep(1 3) keepusing(pca01_id) gen(merge_shrug)
		

		
	// Histograms of 2001 village population, with habitation matches
twoway 	(hist tot_p_top if pca01_id!=., freq fcolor(navy) lcolor(black) start(0.5) width(100)) ///
		(hist tot_p_top if pca01_id!=. & hab_merge==1, freq fcolor(none) lcolor(eltblue) start(0.5) width(100) lw(medium)) ///
		(hist tot_p_top if pca01_id!=. & hab_merge==1 & pop_mismatch20==0, freq fcolor(eltblue) lcolor(navy) start(0.5) width(100) lw(medium)) ///
		(scatteri 47000 150 (12) "" 47000 320 (12) "Bandwidths", color(black) ///
			msymbol(p) connect(l) lwidth(medium) mlabcolor(black) lp(l)) ///
		(scatteri 47000 150 46200 150, color(black) msymbol(p) connect(l) lwidth(medium) lp(l)) ///
		(scatteri 47000 450 46200 450, color(black) msymbol(p) connect(l) lwidth(medium) lp(l))  ///  
		(scatteri 47000 320 47000 450, color(black) msymbol(p) connect(l) lwidth(medium) lp(l)) ///
		, ///
		xtitle("")  ytitle("Thousand villages")  /// 
		/*title("Habitation Merge", size(medlarge) color(black))*/ ///
		graphregion(lcolor(white)) graphregion(color(white)) plotregion(fcolor(white)) ///
		legend(c(1) order(1 2 3) size(medsmall) pos(6) ///
			lab(1 "All villages") ///
			lab(2 "Villages matched with habitation census") ///
			lab(3 "Villages matched with habitation census, no population mismatch")) ///
		ylabel(,nogrid angle(0)) ///
	    yla(0  10000 "10" 20000 "20" 30000 "30" 40000 "40" 50000 "50")			 
graph export "$texfig/figure_hab_merge_hist.pdf", replace 

  // Histograms of 2001 village population, with SECC matches
twoway  (hist tot_p_top if pca01_id!=., freq fcolor(navy) lcolor(black) start(0.5) width(100)) ///
		(hist tot_p_top if pca01_id!=. & merge_secc==1, freq fcolor(none) lcolor(eltblue) start(0.5) width(100) lw(medium)) ///
		(scatteri 47000 150 (12) "" 47000 320 (12) "Bandwidths", color(black) ///
			msymbol(p) connect(l) lwidth(medium) mlabcolor(black) lp(l)) ///
		(scatteri 47000 150 46200 150, color(black) msymbol(p) connect(l) lwidth(medium) lp(l)) ///
		(scatteri 47000 450 46200 450, color(black) msymbol(p) connect(l) lwidth(medium) lp(l))  ///  
		(scatteri 47000 320 47000 450, color(black) msymbol(p) connect(l) lwidth(medium) lp(l)) ///
		, ///
		xtitle("")  ytitle("Thousand villages")  /// 
		/*title("SECC Merge", size(medlarge) color(black))*/ ///
		graphregion(lcolor(white)) graphregion(color(white)) plotregion(fcolor(white)) ///
		legend(c(2) order(1 2 ) size(medsmall) pos(6) ///
			lab(1 "All villages     ") ///
			lab(2 "Villages matched with SECC dataset")) ///
		ylabel(,nogrid angle(0)) ///
		yla(0  10000 "10" 20000 "20" 30000 "30" 40000 "40" 50000 "50") aspect(.4)			 
graph export "$texfig/figure_secc_merge_hist.pdf", replace 

	// Histograms of 2001 village population, with schools matches
twoway	(hist tot_p_top if pca01_id!=., freq fcolor(white) lcolor(gs12) start(0.5) width(100) lw(medium)) ///
		(hist tot_p_top if pca01_id!=. & vd_edu_d_11==1, freq fcolor(navy) lcolor(black) start(0.5) width(100)) ///	
		(hist tot_p_top if pca01_id!=. & merge_dise==3, freq fcolor(none) lcolor(eltblue) start(0.5) width(100) lw(medium)) ///
		(scatteri 47000 150 (12) "" 47000 320 (12) "Bandwidths", color(black) ///
			msymbol(p) connect(l) lwidth(medium) mlabcolor(black) lp(l)) ///
		(scatteri 47000 150 46200 150, color(black) msymbol(p) connect(l) lwidth(medium) lp(l)) ///
		(scatteri 47000 450 46200 450, color(black) msymbol(p) connect(l) lwidth(medium) lp(l))  ///  
		(scatteri 47000 320 47000 450, color(black) msymbol(p) connect(l) lwidth(medium) lp(l)) ///
		, ///
		xtitle("")  ytitle("Thousand villages")  /// 
		/*title("Schools Merge", size(medlarge) color(black))*/ ///
		graphregion(lcolor(white)) graphregion(color(white)) plotregion(fcolor(white)) ///
		legend(c(1) order(1 2 3) size(medsmall) pos(6) ///
			lab(1 "All villages") ///
			lab(2 "Villages with schools in 2011") ///
			lab(3 "Villages matched with schools in DISE dataset")) ///
		ylabel(,nogrid angle(0)) ///
		yla(0  10000 "10" 20000 "20" 30000 "30" 40000 "40" 50000 "50")			 
graph export "$texfig/figure_schools_merge_hist.pdf", replace 

	// Histograms of 2001 village population, with Economic Census matches
twoway 	(hist tot_p_top if pca01_id!=., freq fcolor(white) lcolor(gs12) start(0.5) width(100) lw(medium)) ///
		(hist tot_p_top if pca01_id!=. & any_work_ot==1, freq fcolor(navy) lcolor(black) start(0.5) width(100)) ///
		(hist tot_p_top if pca01_id!=. & merge_ec2013==1, freq fcolor(none) lcolor(eltblue) start(0.5) width(100) lw(medium)) ///
		(hist tot_p_top if pca01_id!=. & merge_ec2013==1 & merge_ec2005==1, freq fcolor(eltblue) lcolor(navy) start(0.5) width(100) lw(medium)) ///
		(scatteri 47000 150 (12) "" 47000 320 (12) "Bandwidths", color(black) ///
			msymbol(p) connect(l) lwidth(medium) mlabcolor(black) lp(l)) ///
		(scatteri 47000 150 46200 150, color(black) msymbol(p) connect(l) lwidth(medium) lp(l)) ///
		(scatteri 47000 450 46200 450, color(black) msymbol(p) connect(l) lwidth(medium) lp(l))  ///  
		(scatteri 47000 320 47000 450, color(black) msymbol(p) connect(l) lwidth(medium) lp(l)) ///
		, ///
		xtitle("")  ytitle("Thousand villages")  /// 
		/*title("Habitation Merge", size(medlarge) color(black))*/ ///
		graphregion(lcolor(white)) graphregion(color(white)) plotregion(fcolor(white)) ///
		legend(c(1) order(1 2 3 4) size(medsmall) pos(6) ///
			lab(1 "All villages") ///
			lab(2 "Villages with any non-agricultural labor in 2011 Census") ///
			lab(3 "Villages matched with 2013 Economic Census") ///
			lab(4 "Villages matched with 2005 & 2013 Economic Census")) ///
		ylabel(,nogrid angle(0)) ///
	    yla(0  10000 "10" 20000 "20" 30000 "30" 40000 "40" 50000 "50")			 
graph export "$texfig/figure_ec_merge_hist.pdf", replace 

  // Histograms of 2001 village population, with SHRUG matches 
twoway  (hist tot_p_top if pca01_id!=., freq fcolor(navy) lcolor(black) start(0.5) width(100)) ///
		(hist tot_p_top if pca01_id!=. & merge_shrug==3, freq fcolor(none) lcolor(eltblue) start(0.5) width(100) lw(medium)) ///
		(scatteri 47000 150 (12) "" 47000 320 (12) "Bandwidths", color(black) ///
			msymbol(p) connect(l) lwidth(medium) mlabcolor(black) lp(l)) ///
		(scatteri 47000 150 46200 150, color(black) msymbol(p) connect(l) lwidth(medium) lp(l)) ///
		(scatteri 47000 450 46200 450, color(black) msymbol(p) connect(l) lwidth(medium) lp(l))  ///  
		(scatteri 47000 320 47000 450, color(black) msymbol(p) connect(l) lwidth(medium) lp(l)) ///
		, ///
		xtitle("")  ytitle("Thousand villages")  /// 
		/*title("SHRUG Merge", size(medlarge) color(black))*/ ///
		graphregion(lcolor(white)) graphregion(color(white)) plotregion(fcolor(white)) ///
		legend(c(2) order(1 2 ) size(medsmall) pos(6) ///
			lab(1 "All villages     ") ///
			lab(2 "Villages matched with SHRUG dataset")) ///
		ylabel(,nogrid angle(0)) ///
		yla(0  10000 "10" 20000 "20" 30000 "30" 40000 "40" 50000 "50")			 
graph export "$texfig/figure_shrug_merge_hist.pdf", replace 

	
   // MAKE FIGURE: habitation merge
count if inrange(tot_p,150,450)
local rN = r(N)
count if inrange(tot_p,150,450) & hab_merge==1
local rA = string(100*r(N)/`rN',"%9.1f")
count if inrange(tot_p,150,450) & hab_merge==1 & pop_mismatch20==0
local rB = string(100*r(N)/`rN',"%9.1f")

cap file close tablewrite
file open tablewrite using "$texfig/figure_hab_merge_hist.tex", write text replace
 
file write tablewrite "\begin{figure}[h!]\centering" _n
file write tablewrite "\caption{Habitation merge results, by 2001 village population}" _n
file write tablewrite "\label{fig:hab_merge_hist}" _n
file write tablewrite "\vspace{-2mm}" _n
file write tablewrite "{\includegraphics[width=.63\textwidth, trim={0 4mm 0 0}, clip]{${texfig_short}/figure_hab_merge_hist.pdf}} " _n
file write tablewrite "\vspace{-2mm}" _n
file write tablewrite "\caption*{\scriptsize Note. --- " _n
file write tablewrite "This figure shows a histogram of Indian villages by 2001 population (solid navy), " _n
file write tablewrite "and the subset of villages that we successfully match with the habitation census (hollow blue). " _n
file write tablewrite "Solid light blue bars show the subset of matched villages with population disparities of " _n
file write tablewrite "less than 20\%, which we include in our RD analysis. " _n
file write tablewrite "We match `rA'\% of villages with 2001 populations between 150 and 450. " _n
file write tablewrite "Excluding villages with population mismatches leaves us with " _n
file write tablewrite "`rB'\% of villages with 2001 populations between 150 and 450." _n
file write tablewrite "}" _n
file write tablewrite "\end{figure}" _n

file close tablewrite
	

   // MAKE FIGURE: SECC merge
count if merge_secc
local rA = string(100*r(N)/_N,"%9.1f")
local rN = r(N)
count if merge_secc_10
local rB = string(100*r(N)/`rN',"%9.1f")

count if inrange(tot_p,150,450)
local rN = r(N)
count if merge_secc & inrange(tot_p,150,450)
local rC = string(100*r(N)/`rN',"%9.1f")
local rN = r(N)
count if merge_secc_10 & inrange(tot_p,150,450)
local rD = string(100*r(N)/`rN',"%9.1f")

cap file close tablewrite
file open tablewrite using "$texfig/figure_secc_merge_hist.tex", write text replace
 
file write tablewrite "\begin{figure}[h!]\centering" _n
file write tablewrite "\caption{SECC merge results, by 2001 village population}" _n
file write tablewrite "\label{fig:secc_merge_hist}" _n
file write tablewrite "\vspace{-2mm}" _n
file write tablewrite "{\includegraphics[width=.63\textwidth, trim={0 19mm 0 19mm}, clip]{${texfig_short}/figure_secc_merge_hist.pdf}} " _n
file write tablewrite "\vspace{-2mm}" _n
file write tablewrite "\caption*{\scriptsize Note. --- " _n
file write tablewrite "This figure shows a histogram of Indian villages by 2001 population (solid navy), and the subset " _n
file write tablewrite "of villages that we successfully match with a village in the SECC dataset (hollow blue). " _n
file write tablewrite "Overall, we match `rA'\% of Census villages to the SECC dataset; for `rB'\% of " _n
file write tablewrite "these matches, at least 10\% of total households are included in our SECC dataset " _n
file write tablewrite "(because they have at least one poverty indicator).  We match `rC'\% " _n
file write tablewrite "of Census villages with 2001 populations between 150--450; for `rD'\% of " _n
file write tablewrite "these matches, our SECC dataset includes at least 10\% of total village households. " _n
file write tablewrite "}" _n
file write tablewrite "\end{figure}" _n

file close tablewrite
	
	
   // MAKE FIGURE: DISE merge
count if vd_edu_d_11==1
local rN = r(N)
count if merge_dise==3
local rA = string(100*r(N)/`rN',"%9.1f")

count if vd_edu_d_11==1 & inrange(tot_p,150,450)
local rN = r(N)
count if merge_dise==3 & inrange(tot_p,150,450)
local rB = string(100*r(N)/`rN',"%9.1f")

cap file close tablewrite
file open tablewrite using "$texfig/figure_schools_merge_hist.tex", write text replace
 
file write tablewrite "\begin{figure}[h!]\centering" _n
file write tablewrite "\caption{DISE merge results, by 2001 village population}" _n
file write tablewrite "\label{fig:schools_merge_hist}" _n
file write tablewrite "\vspace{-2mm}" _n
file write tablewrite "{\includegraphics[width=.68\textwidth, trim={0 4mm 0 2mm}, clip]{${texfig_short}/figure_schools_merge_hist.pdf}} " _n
file write tablewrite "\vspace{-2mm}" _n
file write tablewrite "\caption*{\scriptsize Note. --- " _n
file write tablewrite "This figure shows a histogram of Indian villages by 2001 population (hollow white), " _n
file write tablewrite "the subset of villages with schools 2011 (as reported by the 2011 Census; solid navy), " _n
file write tablewrite "and the subset of villages that we successfully match with a school in the the DISE dataset (hollow blue). " _n
file write tablewrite "We match `rA'\% of villages with schools in 2011; for villages with 2001 populations" _n
file write tablewrite "between 150 and 450, we match `rB'\% of those with schools in 2011. "_n
file write tablewrite "}" _n
file write tablewrite "\end{figure}" _n

file close tablewrite	

   // MAKE FIGURE: Economic Census merge
count if any_work_ot==1 & inrange(tot_p,150,450)
local rN = r(N)
count if  any_work_ot==1 & inrange(tot_p,150,450) & merge_ec2013==1
local rA = string(100*r(N)/`rN',"%9.1f")
count if  any_work_ot==1 & inrange(tot_p,150,450) & merge_ec2013==1 & merge_ec2005==1
local rB = string(100*r(N)/`rN',"%9.1f")

cap file close tablewrite
file open tablewrite using "$texfig/figure_ec_merge_hist.tex", write text replace
 
file write tablewrite "\begin{figure}[h!]\centering" _n
file write tablewrite "\caption{Economic Census merge results, by 2001 village population}" _n
file write tablewrite "\label{fig:ec_merge_hist}" _n
file write tablewrite "\vspace{-2mm}" _n
file write tablewrite "{\includegraphics[width=.68\textwidth, trim={0 4mm 0 2mm}, clip]{${texfig_short}/figure_ec_merge_hist.pdf}} " _n
file write tablewrite "\vspace{-2mm}" _n
file write tablewrite "\caption*{\scriptsize Note. --- " _n
file write tablewrite "This figure shows a histogram of Indian villages by 2001 population (hollow white), " _n
file write tablewrite "the subset of villages with any non-agricultural labor in 2011 (solid navy), " _n
file write tablewrite "and the subset of villages that we successfully match to the Economic Census in " _n
file write tablewrite "2013 (hollow blue) or in 2005 and 2013 (solid light blue). " _n
file write tablewrite "Of villages with 2001 populations between 150 and 450 and non-zero non-agricultural labor, " _n
file write tablewrite "we match `rA'\% to the 2013 Economic Census and `rB' to both 2005 and 2013 years. "_n
file write tablewrite "}" _n
file write tablewrite "\end{figure}" _n

file close tablewrite	

}

****************************************************************** 
****************************************************************** 

** Table C5: Master dataset observation counts
{
use pca01_id using "$panel/census_panel_2001_2011.dta", clear
local tab_2_1: di %12.0fc _N

use pca01_id c_code01 sing_h match_h3 match_h9 vplan4 state st_code ///
	corr_state tot_p* no_hh11 sample_h pop_non_zero pop_mismatch20 using "$panel/panel_dataset_full.dta", clear
tab state
gen bad_maps = inlist(state,"ASSAM","HIMACHAL PRADESH","JAMMU & KASHMIR","UTTAR PRADESH","UTTARAKHAND")
count if vplan4<11
local tab_2_2: di %12.0fc r(N)

	// Habitation merge indicator
egen hab_merge = rowmax(match_h3 match_h9)

	// 10th-Plan indicator
gen plan10 = pop_mismatch20==0 & vplan4<11

	// Single habitation indicator
gen hab1 = sing_h==1 & sample_h==1

	// 150-450 population indicator
gen popbw = tot_p>=150 & tot_p<=450 & pop_non_zero==1	

	// DISE merge indicator
merge 1:1 pca01_id using "$panel/pca_school_selected_WIDE_new.dta", keep(1 3) keepusing(tot_enroll_all2011 tot_enroll_all2005) 
gen merge_dise = _merge==3 //tot_enroll_all2011!=. & tot_enroll_all2005!=.
drop _merge

	// SECC merge indicator
merge 1:m pca01_id using "$panel/secc_pca_vill_all.dta", keep(1 3) keepusing(pca01_id n_tot secc_n_hh)
duplicates drop pca01_id, force
gen p_ratio = n_tot/tot_p11
gen hh_ratio = secc_n_hh/no_hh11
gen merge_secc = _merge==3 & p_ratio<=1.10 & hh_ratio<=1.10
gen merge_secc_10 = _merge==3 & p_ratio<=1.10 & hh_ratio<=1.10 & hh_ratio>=0.10
drop _merge

	// EC merge indicator
preserve
use "$panel/ec_shrid_pc01_panel.dta", clear	
tab count_dup_vill_ec, missing
drop if count_dup_vill_ec==1
tab count_dup_shrid, missing
drop if count_dup_shrid!=0
tab flag_ec
drop if flag_ec==1
keep pca01_id ec_*
reshape wide ec_employees ec_firms, i(pca01_id) j(ec_year)
drop *1990 *1998
drop if ec_firms2005==. & ec_firms2013==. & ec_employees2005==. & ec_employees2013==.
assert ec_employees2005!=. if ec_firms2005!=.
assert ec_employees2013!=. if ec_firms2013!=.
assert ec_firms2005!=0 & ec_firms2013!=0
tempfile ec
save `ec'
restore
merge 1:1 pca01_id using `ec', keep(1 3) nogen
gen merge_ec = ec_firms2005!=. & ec_firms2013!=. 

	// SHRUG merge indicator
merge 1:1 pca01_id using "$shrug/shrug_secc.dta", keep(1 3) keepusing(pca01_id) gen(merge_shrug)
		

	// + habitations
count if hab_merge==1
local tab_3_1: di %12.0fc r(N)

count if hab_merge==1 & plan10==1
local tab_3_2: di %12.0fc r(N)

count if hab_merge==1 & plan10==1 & hab1==1
local tab_3_3: di %12.0fc r(N)

count if hab_merge==1 & plan10==1 & hab1==1 & popbw==1
local tab_3_4: di %12.0fc r(N)


	// + habitations + maps
count if hab_merge==1 & c_code01!="" & bad_maps==0
local tab_4_1: di %12.0fc r(N)

count if hab_merge==1 & plan10==1 & c_code01!="" & bad_maps==0
local tab_4_2: di %12.0fc r(N)

count if hab_merge==1 & plan10==1 & hab1==1 & c_code01!="" & bad_maps==0
local tab_4_3: di %12.0fc r(N)

count if hab_merge==1 & plan10==1 & hab1==1 & popbw==1 & c_code01!="" & bad_maps==0 
local tab_4_4: di %12.0fc r(N)


	// + habitations + maps + SHRUG
count if hab_merge==1 & c_code01!="" & bad_maps==0 & merge_shrug==3
local tab_5_1: di %12.0fc r(N)

count if hab_merge==1 & plan10==1 & c_code01!="" & bad_maps==0 & merge_shrug==3
local tab_5_2: di %12.0fc r(N)

count if hab_merge==1 & plan10==1 & hab1==1 & c_code01!="" & bad_maps==0 & merge_shrug==3
local tab_5_3: di %12.0fc r(N)

count if hab_merge==1 & plan10==1 & hab1==1 & popbw==1 & c_code01!="" & bad_maps==0 & merge_shrug==3
local tab_5_4: di %12.0fc r(N)


	// + habitations + maps + SECC
count if hab_merge==1 & c_code01!="" & bad_maps==0 & merge_secc==1
local tab_6_1: di %12.0fc r(N)

count if hab_merge==1 & plan10==1 & c_code01!="" & bad_maps==0 & merge_secc==1
local tab_6_2: di %12.0fc r(N)

count if hab_merge==1 & plan10==1 & hab1==1 & c_code01!="" & bad_maps==0 & merge_secc==1
local tab_6_3: di %12.0fc r(N)

count if hab_merge==1 & plan10==1 & hab1==1 & popbw==1 & c_code01!="" & bad_maps==0 & merge_secc==1
local tab_6_4: di %12.0fc r(N)


	// + habitations + maps + EC
count if hab_merge==1 & c_code01!="" & bad_maps==0 & merge_ec==1
local tab_7_1: di %12.0fc r(N)

count if hab_merge==1 & plan10==1 & c_code01!="" & bad_maps==0 & merge_ec==1
local tab_7_2: di %12.0fc r(N)

count if hab_merge==1 & plan10==1 & hab1==1 & c_code01!="" & bad_maps==0 & merge_ec==1
local tab_7_3: di %12.0fc r(N)

count if hab_merge==1 & plan10==1 & hab1==1 & popbw==1 & c_code01!="" & bad_maps==0 & merge_ec==1
local tab_7_4: di %12.0fc r(N)

	
	// + habitations + maps + DISE
count if hab_merge==1 & c_code01!="" & bad_maps==0 & merge_dise==1
local tab_8_1: di %12.0fc r(N)

count if hab_merge==1 & plan10==1 & c_code01!="" & bad_maps==0 & merge_dise==1
local tab_8_2: di %12.0fc r(N)

count if hab_merge==1 & plan10==1 & hab1==1 & c_code01!="" & bad_maps==0 & merge_dise==1
local tab_8_3: di %12.0fc r(N)

count if hab_merge==1 & plan10==1 & hab1==1 & popbw==1 & c_code01!="" & bad_maps==0 & merge_dise==1
local tab_8_4: di %12.0fc r(N)
local denom = r(N)

count if hab_merge==1 & plan10==1 & hab1==1 & popbw==1 & c_code01!="" & bad_maps==0 & merge_dise==1 & tot_enroll_all2005!=.
local num = r(N)
local pct_sch_05 = string(100*`num'/`denom',"%9.0f")

	
	// MAKE TABLE
cap file close tablewrite
file open tablewrite using "$textab/table_village_counts.tex", write text replace

file write tablewrite "\begin{table}[ht]\centering" _n
file write tablewrite "\caption{Count of villages by merged dataset \label{tab:village_counts}}" _n
file write tablewrite "\vspace{-2mm}" _n
file write tablewrite "\small" _n
file write tablewrite "\begin{tabular}{lrcrcrcr}" _n
file write tablewrite "\hline" _n
file write tablewrite "\multicolumn{1}{c}{Number of Villages} & " _n
file write tablewrite "\multicolumn{1}{c}{Total} &&" _n
file write tablewrite "\multicolumn{1}{c}{$\begin{matrix}\text{RGGVY} \\ \text{10th-Plan}\end{matrix}$} &&" _n
file write tablewrite "\multicolumn{1}{c}{$\begin{matrix}\text{RGGVY} \\ \text{10th-Plan}\\ \text{Single-Hab.}\end{matrix}$} &&" _n
file write tablewrite "\multicolumn{1}{c}{$\begin{matrix}\text{RGGVY} \\ \text{10th-Plan}\\ \text{Single-Hab.} \\ \text{150--450} \end{matrix}$} " _n
file write tablewrite "\\[0.1em] \hline" _n
file write tablewrite "\\[-0.5em]" _n
file write tablewrite "Raw Census datasets (village-level)       & \multicolumn{2}{l}{593,000+} \\" _n
file write tablewrite "Census panel                              & `tab_2_1' && `tab_2_2' \\" _n
file write tablewrite "Census panel + habitations                & `tab_3_1' && `tab_3_2' && `tab_3_3' && `tab_3_4' \\" _n
file write tablewrite "Census panel + habitations + maps         & `tab_4_1' && `tab_4_2' && `tab_4_3' && `tab_4_4' \\" _n
file write tablewrite "Census panel + habitations + maps + SHRUG & `tab_5_1' && `tab_5_2' && `tab_5_3' && `tab_5_4' \\" _n
file write tablewrite "Census panel + habitations + maps + SECC  & `tab_6_1' && `tab_6_2' && `tab_6_3' && `tab_6_4' \\" _n
file write tablewrite "Census panel + habitations + maps + EC    & `tab_7_1' && `tab_7_2' && `tab_7_3' && `tab_7_4' \\" _n
file write tablewrite "Census panel + habitations + maps + DISE  & `tab_8_1' && `tab_8_2' && `tab_8_3' && `tab_8_4' \\" _n
file write tablewrite "[0.25em]\hline" _n
file write tablewrite "\end{tabular}" _n
file write tablewrite "\captionsetup{width=\textwidth}" _n
file write tablewrite "\vspace{-2mm}" _n
file write tablewrite "\caption*{\scriptsize Note. ---  " _n
file write tablewrite "All village counts exclude Goa and the 7 Union Territories, which were not covered under RGGVY. " _n
file write tablewrite "Rows 2--8 include villages we can link across 2001 and 2011 Census years. " _n
file write tablewrite "Rows 3--8 include villages we can match to the 2003 or 2009 census of habitations, " _n
file write tablewrite "with populations disparities of less than 20\%. " _n
file write tablewrite "Rows 4--8 include villages in states with reliable village shapefiles " _n
file write tablewrite "(i.e. the top 12 rows of Table \ref{tab:hab_merge}), " _n
file write tablewrite "letting us use nighttime brightness as a pre-RGGVY control in our RD estimation. " _n
file write tablewrite "Row 5 includes villages we can match to the SHRUG dataset (with 2011 expenditure per capita). " _n
file write tablewrite "Row 6 includes villages we can match to our SECC dataset. " _n
file write tablewrite "Row 7 includes villages we can match to both the 2005 and 2013 Economic Census. " _n
file write tablewrite "Row 8 includes villages we can match to the DISE dataset, only `pct_sch_05'\% of which have  " _n
file write tablewrite "nonmissing 2005 school data (a key pre-RGGVY control). " _n
file write tablewrite "We conduct our RD analysis on villages in the right-most column, with optimal RD " _n
file write tablewrite "bandwidths less than 150 above/below the 300-person cutoff." _n
file write tablewrite "}" _n
file write tablewrite "\end{table}" _n

file close tablewrite

}

****************************************************************** 
****************************************************************** 

** Figure C6: NSS weights quintiles by population 2005-2010
{
use "$panel/panel_dataset_dd_nss_uncollapsed.dta", clear
gen frame_pop_top = frame_population
replace frame_pop_top = 4000 if frame_population>4000
egen temp = tag(fsu year)
unique fsu year
local uniq = r(unique)
keep if temp
unique fsu year
assert r(unique)==`uniq'
assert r(unique)==r(N)


twoway 	(hist frame_pop_top if inrange(WT_decile,1,2) & year==2005, freq fcolor(navy) lcolor(black) start(0.5) width(100)) ///
		(hist frame_pop_top if inrange(WT_decile,3,10) & year==2005, freq fcolor(none) lcolor(eltblue) start(0.5) width(100) lw(medium)) ///
		, ///
		xtitle("2001 village population", size(medlarge))  ytitle("Count of villages", size(medlarge))  /// 
		title("2005 NSS villages by 2001 population", size(large) color(black)) ///
		graphregion(lcolor(white)) graphregion(color(white)) plotregion(fcolor(white)) ///
		legend(c(2) order(1 2) size(medlarge) pos(6) ///
			lab(1 "NSS weight quintile 1      ") ///
			lab(2 "NSS weight quintiles 2-5")) ///
		xlabel(, labsize(medlarge)) ///
		ylabel(,nogrid angle(0) format(%9.0f) labsize(medlarge))			 
graph export "$texfig/figure_nss_weights_hist_2005.pdf", replace 

twoway 	(hist frame_pop_top if inrange(WT_decile,1,2) & year==2010, freq fcolor(navy) lcolor(black) start(0.5) width(100)) ///
		(hist frame_pop_top if inrange(WT_decile,3,10) & year==2010, freq fcolor(none) lcolor(eltblue) start(0.5) width(100) lw(medium)) ///
		, ///
		xtitle("2001 village population", size(medlarge))  ytitle("Thousand villages", size(medlarge))  /// 
		title("2010 NSS villages by 2001 population", size(large) color(black)) ///
		graphregion(lcolor(white)) graphregion(color(white)) plotregion(fcolor(white)) ///
		legend(c(2) order(1 2) size(medlarge) pos(6) ///
			lab(1 "NSS weight quintile 1      ") ///
			lab(2 "NSS weight quintiles 2-5")) ///
		xlabel(, labsize(medlarge)) ///
		ylabel(,nogrid angle(0) format(%9.0f) labsize(medlarge))			 
graph export "$texfig/figure_nss_weights_hist_2010.pdf", replace 


gen Q = ceil(WT_decile/2)
foreach y in 2005 2010 {
	forvalues q = 1/5 {
		qui sum frame_population if year==`y' & Q==`q', detail
		local med_`q'_`y' = r(p50)	
	}
}

twoway 	(kdensity frame_population if inrange(WT_decile,1,2) & year==2005 & frame_population<=4000, lcolor(navy) lw(medium) lp(solid)) ///
		(kdensity frame_population if inrange(WT_decile,3,10) & year==2005 & frame_population<=4000, lcolor(eltblue) lw(medium) lp(dash)) ///
		(scatteri 0.0005 `med_1_2005' (6) "q1", mcolor(navy) msymbol(circle) mlabcolor(black) mlabsize(medium)) ///
		(scatteri 0.0005 `med_2_2005' (7) "q2", mcolor(eltblue) msymbol(circle) mlabcolor(black) mlabsize(medium)) ///
		(scatteri 0.0005 `med_3_2005' (6) "q3", mcolor(eltblue) msymbol(circle) mlabcolor(black) mlabsize(medium)) ///
		(scatteri 0.0005 `med_4_2005' (6) "q4", mcolor(eltblue) msymbol(circle) mlabcolor(black) mlabsize(medium)) ///
		(scatteri 0.0005 `med_5_2005' (5) "q5", mcolor(eltblue) msymbol(circle) mlabcolor(black) mlabsize(medium)) ///
		(scatteri 0.000517 1800 (12) "Medians by quintile", mcolor(none) mlabcolor(black) mlabsize(medium)) ///
		, ///
		xtitle("2001 village population", size(medlarge))  ytitle("Density", size(medlarge))  /// 
		title("2005 NSS villages by 2001 population", size(large) color(black)) ///
		graphregion(lcolor(white)) graphregion(color(white)) plotregion(fcolor(white)) ///
		legend(c(2) order(1 2) size(medlarge) pos(6) symxsize(6) ///
			lab(1 "NSS weight quintile 1      ") ///
			lab(2 "NSS weight quintiles 2-5")) ///
		xlabel(, labsize(medlarge)) ///
		ylabel(0 0.0002 0.0004 0.0006,nogrid angle(0) format(%9.4f) labsize(medlarge))		
graph export "$texfig/figure_nss_weights_kdens_2005.pdf", replace 

twoway 	(kdensity frame_population if inrange(WT_decile,1,2) & year==2010 & frame_population<=4000, lcolor(navy) lw(medium) lp(solid)) ///
		(kdensity frame_population if inrange(WT_decile,3,10) & year==2010 & frame_population<=4000, lcolor(eltblue) lw(medium) lp(dash)) ///
		(scatteri 0.0005 `med_1_2010' (6) "q1", mcolor(navy) msymbol(circle) mlabcolor(black) mlabsize(medium)) ///
		(scatteri 0.0005 `med_2_2010' (6) "q2", mcolor(eltblue) msymbol(circle) mlabcolor(black) mlabsize(medium)) ///
		(scatteri 0.0005 `med_3_2010' (6) "q3", mcolor(eltblue) msymbol(circle) mlabcolor(black) mlabsize(medium)) ///
		(scatteri 0.0005 `med_4_2010' (6) "q4", mcolor(eltblue) msymbol(circle) mlabcolor(black) mlabsize(medium)) ///
		(scatteri 0.0005 `med_5_2010' (6) "q5", mcolor(eltblue) msymbol(circle) mlabcolor(black) mlabsize(medium)) ///
		(scatteri 0.000517 2000 (12) "Medians by quintile", mcolor(none) mlabcolor(black) mlabsize(medium)) ///
		, ///
		xtitle("2001 village population", size(medlarge))  ytitle("Density", size(medlarge))  /// 
		title("2010 NSS villages by 2001 population", size(large) color(black)) ///
		graphregion(lcolor(white)) graphregion(color(white)) plotregion(fcolor(white)) ///
		legend(c(2) order(1 2) size(medlarge) pos(6) symxsize(6) ///
			lab(1 "NSS weight quintile 1      ") ///
			lab(2 "NSS weight quintiles 2-5")) ///
		xlabel(, labsize(medlarge)) ///
		ylabel(0 0.0002 0.0004 0.0006,nogrid angle(0) format(%9.4f) labsize(medlarge))		
graph export "$texfig/figure_nss_weights_kdens_2010.pdf", replace 


	// counts of >4000 villages
count if year==2010 & frame_pop_top==4000
local rN = r(N)
count if year==2010 & frame_pop_top==4000 & !inlist(WT_decile,1,2)
di r(N)/`rN'

count if year==2005 & frame_pop_top==4000
local rN = r(N)
count if year==2005 & frame_pop_top==4000 & !inlist(WT_decile,1,2)
di r(N)/`rN'


   // MAKE FIGURE
cap file close tablewrite
file open tablewrite using "$texfig/figure_nss_weights_dist.tex", write text replace
 
file write tablewrite "\begin{figure}[h!]\centering" _n
file write tablewrite "\caption{Village population distributions by NSS weight quintile}" _n
file write tablewrite "\label{fig:nss_weights_dist}" _n
file write tablewrite "\vspace{-2mm}" _n
file write tablewrite "{\includegraphics[width=.495\textwidth, clip]{${texfig_short}/figure_nss_weights_hist_2005.pdf}} " _n
file write tablewrite "{\includegraphics[width=.495\textwidth, clip]{${texfig_short}/figure_nss_weights_kdens_2005.pdf}} \\ " _n
file write tablewrite "{\includegraphics[width=.495\textwidth, clip]{${texfig_short}/figure_nss_weights_hist_2010.pdf}} " _n
file write tablewrite "{\includegraphics[width=.495\textwidth, clip]{${texfig_short}/figure_nss_weights_kdens_2010.pdf}} \\ " _n
file write tablewrite "\vspace{-2mm}" _n
file write tablewrite "\caption*{\scriptsize Note. --- " _n
file write tablewrite "The left panels display histograms of 2001 populations for villages in the 2005 and 2010 NSS sampling frames. " _n
file write tablewrite "Solid navy bars report counts of villages in the bottom quintile of NSS sampling weights, while " _n
file write tablewrite "hollow blue bars report counts of villages in the top four quintiles. " _n
file write tablewrite "Histograms are top-coded at 4000. " _n
file write tablewrite "The right panels report the corresponding kernel densities, removing top-coded villages. " _n
file write tablewrite "Dots report medians of the untruncated distributions, for all five quintiles. " _n
file write tablewrite "}" _n
file write tablewrite "\end{figure}" _n

file close tablewrite

}

****************************************************************** 
****************************************************************** 

